From 7fbc7af440e37205dad8d7400134acc730f53912 Mon Sep 17 00:00:00 2001
From: "[6~" <inform@tiker.net>
Date: Tue, 12 Nov 2019 22:05:06 -0600
Subject: [PATCH 001/460] Add wave equation solver on the way to diamond tiling
 demo

---
 test/test_transform.py | 32 +++++++++++++++++++++++++++++++-
 1 file changed, 31 insertions(+), 1 deletion(-)

diff --git a/test/test_transform.py b/test/test_transform.py
index cdc0c14b8..0906c9b34 100644
--- a/test/test_transform.py
+++ b/test/test_transform.py
@@ -552,7 +552,6 @@ def test_split_iname_only_if_in_within():
 
 def test_nested_substs_in_insns(ctx_factory):
     ctx = ctx_factory()
-    import loopy as lp
 
     ref_knl = lp.make_kernel(
         "{[i]: 0<=i<10}",
@@ -570,6 +569,37 @@ def test_nested_substs_in_insns(ctx_factory):
     lp.auto_test_vs_ref(ref_knl, ctx, knl)
 
 
+def test_diamond_tiling(ctx_factory):
+    ctx = ctx_factory()
+    queue = cl.CommandQueue(ctx)
+
+    ref_knl = lp.make_kernel(
+        "[nx,nt] -> {[ix, it]: 1<=ix<nx-1 and 0<=it<nt}",
+        """
+        u[ix, it+2] = (
+            2*u[ix, it+1]
+            + dt**2/dx**2 * (u[ix+1, it+1] - 2*u[ix, it+1] + u[ix-1, it+1]) 
+            - u[ix, it])
+        """)
+
+    ref_knl = lp.prioritize_loops(ref_knl, "it, ix")
+
+    ref_knl = lp.set_options(ref_knl, write_cl=True)
+    nx = 43
+    u = np.zeros((nx, 200))
+    x = np.linspace(-1, 1, nx)
+    dx = x[1] - x[0]
+    u[:, 0] = u[:, 1] = np.exp(-100*x**2)
+
+    u_dev = cl.array.to_device(queue, u)
+    ref_knl(queue, u=u_dev, dx=dx, dt=dx)
+
+    u = u_dev.get()
+    import matplotlib.pyplot as plt
+    plt.imshow(u.T)
+    plt.show()
+
+
 if __name__ == "__main__":
     if len(sys.argv) > 1:
         exec(sys.argv[1])

From af822259a1be28cf30f710fe0bbd4bf731ef54b5 Mon Sep 17 00:00:00 2001
From: "[6~" <inform@tiker.net>
Date: Wed, 13 Nov 2019 00:56:16 -0600
Subject: [PATCH 002/460] Add initial prototype of map_domain and (currently
 broken) test of diamond tiling

---
 loopy/__init__.py        |   4 +-
 loopy/transform/iname.py | 234 +++++++++++++++++++++++++++++++++++++++
 test/test_transform.py   |  30 ++++-
 3 files changed, 263 insertions(+), 5 deletions(-)

diff --git a/loopy/__init__.py b/loopy/__init__.py
index b60de6e2d..f8a43df6d 100644
--- a/loopy/__init__.py
+++ b/loopy/__init__.py
@@ -78,7 +78,7 @@
         affine_map_inames, find_unused_axis_tag,
         make_reduction_inames_unique,
         has_schedulable_iname_nesting, get_iname_duplication_options,
-        add_inames_to_insn)
+        add_inames_to_insn, map_domain)
 
 from loopy.transform.instruction import (
         find_instructions, map_instructions,
@@ -195,7 +195,7 @@
         "affine_map_inames", "find_unused_axis_tag",
         "make_reduction_inames_unique",
         "has_schedulable_iname_nesting", "get_iname_duplication_options",
-        "add_inames_to_insn",
+        "add_inames_to_insn", "map_domain",
 
         "add_prefetch", "change_arg_to_image",
         "tag_array_axes", "tag_data_axes",
diff --git a/loopy/transform/iname.py b/loopy/transform/iname.py
index 96c8252ef..e627604c1 100644
--- a/loopy/transform/iname.py
+++ b/loopy/transform/iname.py
@@ -1759,4 +1759,238 @@ def add_inames_to_insn(knl, inames, insn_match):
 # }}}
 
 
+# {{{ map_domain
+
+class _MapDomainMapper(RuleAwareIdentityMapper):
+    def __init__(self, rule_mapping_context, within, new_inames, substitutions):
+        super(_MapDomainMapper, self).__init__(rule_mapping_context)
+
+        self.within = within
+
+        self.old_inames = frozenset(substitutions)
+        self.new_inames = new_inames
+
+        self.substitutions = substitutions
+
+    def map_reduction(self, expr, expn_state):
+        overlap = frozenset(expr.inames) & self.old_inames
+        if (overlap
+                and self.split_iname not in expn_state.arg_context
+                and self.within(
+                    expn_state.kernel,
+                    expn_state.instruction)):
+            # FIXME
+            if len(overlap) != len(self.old_inames):
+                raise LoopyError(...)
+
+            raise NotImplementedError("reductions")
+            new_inames = list(expr.inames)
+            new_inames.remove(self.split_iname)
+            new_inames.extend([self.outer_iname, self.inner_iname])
+
+            from loopy.symbolic import Reduction
+            return Reduction(expr.operation, tuple(new_inames),
+                        self.rec(expr.expr, expn_state),
+                        expr.allow_simultaneous)
+        else:
+            return super(_MapDomainMapper, self).map_reduction(expr, expn_state)
+
+    def map_variable(self, expr, expn_state):
+        if (expr.name in self.old_inames
+                and expr.name not in expn_state.arg_context
+                and self.within(
+                    expn_state.kernel,
+                    expn_state.instruction)):
+            return self.substitutions[expr.name]
+        else:
+            return super(_MapDomainMapper, self).map_variable(expr, expn_state)
+
+
+def _find_aff_subst_from_map(iname, isl_map):
+    if not isinstance(isl_map, isl.BasicMap):
+        raise RuntimeError("isl_map must be a BasicMap")
+
+    dt, dim_idx = isl_map.get_var_dict()[iname]
+
+    assert dt == dim_type.in_
+
+    # Force isl to solve for only this iname on its side of the map, by
+    # projecting out all other "in" variables.
+    isl_map = isl_map.project_out(dt, dim_idx+1, isl_map.dim(dt)-(dim_idx+1))
+    isl_map = isl_map.project_out(dt, 0, dim_idx)
+    dim_idx = 0
+
+    # Convert map to set to avoid "domain of affine expression should be a set".
+    # The old "in" variable will be the last of the out_dims.
+    new_dim_idx = isl_map.dim(dim_type.out)
+    isl_map = isl_map.move_dims(
+            dim_type.out, isl_map.dim(dim_type.out),
+            dt, dim_idx, 1)
+    isl_map = isl_map.range()  # now a set
+    dt = dim_type.set
+    dim_idx = new_dim_idx
+    del new_dim_idx
+
+    for cns in isl_map.get_constraints():
+        if cns.is_equality() and cns.involves_dims(dt, dim_idx, 1):
+            coeff = cns.get_coefficient_val(dt, dim_idx)
+            cns_zeroed = cns.set_coefficient_val(dt, dim_idx, 0)
+            if cns_zeroed.involves_dims(dt, dim_idx, 1):
+                # not suitable, constraint still involves dim, perhaps in a div
+                continue
+
+            if coeff.is_one():
+                return -cns_zeroed.get_aff()
+            elif coeff.is_negone():
+                return cns_zeroed.get_aff()
+            else:
+                # not suitable, coefficient does not have unit coefficient
+                continue
+
+    raise LoopyError("no suitable equation for '%s' found" % iname)
+
+
+def map_domain(kernel, isl_map, within=None):
+    # FIXME: Express _split_iname_backend in terms of this
+    #   Missing/deleted for now:
+    #     - slab processing
+    #     - priorities processing
+    # FIXME: Process priorities
+    # FIXME: Express affine_map_inames in terms of this, deprecate
+    # FIXME: Document
+
+    # FIXME: Support within
+
+    # {{{ within processing (disabled for now)
+    if within is not None:
+        raise NotImplementedError("within")
+
+    from loopy.match import parse_match
+    within = parse_match(within)
+
+    # {{{ return the same kernel if no kernel matches
+
+    def _do_not_transform_if_no_within_matches():
+        for insn in kernel.instructions:
+            if within(kernel, insn):
+                return
+
+        return kernel
+
+    _do_not_transform_if_no_within_matches()
+
+    # }}}
+
+    # }}}
+
+    if not isl_map.is_bijective():
+        raise LoopyError("isl_map must be bijective")
+
+    new_inames = frozenset(isl_map.get_var_dict(dim_type.out))
+    old_inames = frozenset(isl_map.get_var_dict(dim_type.in_))
+
+    # {{{ solve for representation of old inames in terms of new
+
+    substitutions = {}
+    var_substitutions = {}
+    applied_iname_rewrites = kernel.applied_iname_rewrites[:]
+
+    from loopy.symbolic import aff_to_expr
+    from pymbolic import var
+    for iname in old_inames:
+        substitutions[iname] = aff_to_expr(
+                _find_aff_subst_from_map(iname, isl_map))
+        var_substitutions[var(iname)] = aff_to_expr(
+                _find_aff_subst_from_map(iname, isl_map))
+
+    applied_iname_rewrites.append(var_substitutions)
+    del var_substitutions
+
+    # }}}
+
+    def process_set(s):
+        var_dict = s.get_var_dict()
+
+        overlap = old_inames & frozenset(var_dict)
+        if overlap and len(overlap) != len(old_inames):
+            raise LoopyError("loop domain '%s' involves a part "
+                    "of the map domain inames. Domains must "
+                    "either involve all or none of the map domain "
+                    "inames." % s)
+
+        # {{{ align dims of isl_map and s
+
+        # FIXME: Make this less gross
+        # FIXME: Make an exported/documented interface of this in islpy
+        from islpy import _align_dim_type
+
+        map_with_s_domain = isl.Map.from_domain(s)
+
+        dim_types = [dim_type.param, dim_type.in_, dim_type.out]
+        s_names = [
+                map_with_s_domain.get_dim_name(dt, i)
+                for dt in dim_types
+                for i in range(map_with_s_domain.dim(dt))
+                ]
+        map_names = [
+                isl_map.get_dim_name(dt, i)
+                for dt in dim_types
+                for i in range(isl_map.dim(dt))
+                ]
+        aligned_map = _align_dim_type(
+                dim_type.param,
+                isl_map, map_with_s_domain, obj_bigger_ok=False,
+                obj_names=map_names, tgt_names=s_names)
+        aligned_map = _align_dim_type(
+                dim_type.in_,
+                isl_map, map_with_s_domain, obj_bigger_ok=False,
+                obj_names=map_names, tgt_names=s_names)
+
+        # }}}
+
+        return aligned_map.intersect_domain(s).range()
+
+        # FIXME: Revive _project_out_only_if_all_instructions_in_within
+
+    new_domains = [process_set(dom) for dom in kernel.domains]
+
+    # {{{ update within_inames
+
+    new_insns = []
+    for insn in kernel.instructions:
+        overlap = old_inames & insn.within_inames
+        if overlap and within(kernel, insn):
+            if len(overlap) != len(old_inames):
+                raise LoopyError("instruction '%s' is within only a part "
+                        "of the map domain inames. Instructions must "
+                        "either be within all or none of the map domain "
+                        "inames." % insn.id)
+
+            insn = insn.copy(
+                    within_inames=(insn.within_inames - old_inames) | new_inames)
+        else:
+            # leave insn unmodified
+            pass
+
+        new_insns.append(insn)
+
+    # }}}
+
+    kernel = kernel.copy(
+            domains=new_domains,
+            instructions=new_insns,
+            applied_iname_rewrites=applied_iname_rewrites)
+
+    rule_mapping_context = SubstitutionRuleMappingContext(
+            kernel.substitutions, kernel.get_var_name_generator())
+    ins = _MapDomainMapper(rule_mapping_context, within,
+            new_inames, substitutions)
+
+    kernel = ins.map_kernel(kernel)
+    kernel = rule_mapping_context.finish_kernel(kernel)
+
+    return kernel
+
+# }}}
+
 # vim: foldmethod=marker
diff --git a/test/test_transform.py b/test/test_transform.py
index 0906c9b34..9b184f4b0 100644
--- a/test/test_transform.py
+++ b/test/test_transform.py
@@ -578,21 +578,45 @@ def test_diamond_tiling(ctx_factory):
         """
         u[ix, it+2] = (
             2*u[ix, it+1]
-            + dt**2/dx**2 * (u[ix+1, it+1] - 2*u[ix, it+1] + u[ix-1, it+1]) 
+            + dt**2/dx**2 * (u[ix+1, it+1] - 2*u[ix, it+1] + u[ix-1, it+1])
             - u[ix, it])
         """)
+    ref_knl = lp.set_options(ref_knl, write_cl=True)
+
+    # FIXME: Handle priorities in map_domain
+    knl_for_transform = ref_knl
 
     ref_knl = lp.prioritize_loops(ref_knl, "it, ix")
 
-    ref_knl = lp.set_options(ref_knl, write_cl=True)
     nx = 43
     u = np.zeros((nx, 200))
     x = np.linspace(-1, 1, nx)
     dx = x[1] - x[0]
     u[:, 0] = u[:, 1] = np.exp(-100*x**2)
 
+    import islpy as isl
+    if 1:
+        m = isl.BasicMap(
+            "[nx,nt] -> {[ix, it] -> [tx, tt, tparity, itt, itx]: "
+            "16*(tx - tt + tparity) + itx - itt = ix - it and "
+            "16*(tx + tt) + itt + itx = ix + it and "
+            "0<=tparity<2 and 0 <= itx - itt < 16 and 0 <= itt+itx < 16}")
+        knl = lp.map_domain(knl_for_transform, m)
+        knl = lp.prioritize_loops(knl, "tt,tparity,tx,itt,itx")
+    else:
+        # This is more like what split_iname does, but it is *not*
+        # a correct tiling for the stencil operator.
+        m = isl.BasicMap(
+            "[nx,nt] -> {[ix, it] -> [tx, tt, itt, itx]: "
+            "16*tx + itx = ix and "
+            "16*tt + itt = it and "
+            "0 <= itx < 16 and 0 <= itt< 16}")
+
+        knl = lp.map_domain(knl_for_transform, m)
+        knl = lp.prioritize_loops(knl, "tt,tx,itt,itx")
+
     u_dev = cl.array.to_device(queue, u)
-    ref_knl(queue, u=u_dev, dx=dx, dt=dx)
+    knl(queue, u=u_dev, dx=dx, dt=dx)
 
     u = u_dev.get()
     import matplotlib.pyplot as plt

From 70ee84dfda8139f8826aada1d49f4a6c9a0d7f24 Mon Sep 17 00:00:00 2001
From: "[6~" <inform@tiker.net>
Date: Wed, 13 Nov 2019 18:07:41 -0600
Subject: [PATCH 003/460] Make placeholder error message Py2-compatible

---
 loopy/transform/iname.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/loopy/transform/iname.py b/loopy/transform/iname.py
index e627604c1..6236ace72 100644
--- a/loopy/transform/iname.py
+++ b/loopy/transform/iname.py
@@ -1781,7 +1781,7 @@ def map_reduction(self, expr, expn_state):
                     expn_state.instruction)):
             # FIXME
             if len(overlap) != len(self.old_inames):
-                raise LoopyError(...)
+                raise LoopyError("...")
 
             raise NotImplementedError("reductions")
             new_inames = list(expr.inames)

From 3c4c1ff2e46a8feecf7ac1053cb2a90fc7aebb25 Mon Sep 17 00:00:00 2001
From: "[6~" <inform@tiker.net>
Date: Wed, 13 Nov 2019 18:07:59 -0600
Subject: [PATCH 004/460] test_diamond_tiling: Remove split_iname analog

---
 test/test_transform.py | 26 +++++++-------------------
 1 file changed, 7 insertions(+), 19 deletions(-)

diff --git a/test/test_transform.py b/test/test_transform.py
index 9b184f4b0..6c15b91ec 100644
--- a/test/test_transform.py
+++ b/test/test_transform.py
@@ -595,25 +595,13 @@ def test_diamond_tiling(ctx_factory):
     u[:, 0] = u[:, 1] = np.exp(-100*x**2)
 
     import islpy as isl
-    if 1:
-        m = isl.BasicMap(
-            "[nx,nt] -> {[ix, it] -> [tx, tt, tparity, itt, itx]: "
-            "16*(tx - tt + tparity) + itx - itt = ix - it and "
-            "16*(tx + tt) + itt + itx = ix + it and "
-            "0<=tparity<2 and 0 <= itx - itt < 16 and 0 <= itt+itx < 16}")
-        knl = lp.map_domain(knl_for_transform, m)
-        knl = lp.prioritize_loops(knl, "tt,tparity,tx,itt,itx")
-    else:
-        # This is more like what split_iname does, but it is *not*
-        # a correct tiling for the stencil operator.
-        m = isl.BasicMap(
-            "[nx,nt] -> {[ix, it] -> [tx, tt, itt, itx]: "
-            "16*tx + itx = ix and "
-            "16*tt + itt = it and "
-            "0 <= itx < 16 and 0 <= itt< 16}")
-
-        knl = lp.map_domain(knl_for_transform, m)
-        knl = lp.prioritize_loops(knl, "tt,tx,itt,itx")
+    m = isl.BasicMap(
+        "[nx,nt] -> {[ix, it] -> [tx, tt, tparity, itt, itx]: "
+        "16*(tx - tt + tparity) + itx - itt = ix - it and "
+        "16*(tx + tt) + itt + itx = ix + it and "
+        "0<=tparity<2 and 0 <= itx - itt < 16 and 0 <= itt+itx < 16}")
+    knl = lp.map_domain(knl_for_transform, m)
+    knl = lp.prioritize_loops(knl, "tt,tparity,tx,itt,itx")
 
     u_dev = cl.array.to_device(queue, u)
     knl(queue, u=u_dev, dx=dx, dt=dx)

From b61dbcf7aeb705e7071d2129fc20bb40facbf076 Mon Sep 17 00:00:00 2001
From: "[6~" <inform@tiker.net>
Date: Wed, 13 Nov 2019 18:08:47 -0600
Subject: [PATCH 005/460] Fix diamond tile mapping

---
 test/test_transform.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/test/test_transform.py b/test/test_transform.py
index 6c15b91ec..9cb1af4ab 100644
--- a/test/test_transform.py
+++ b/test/test_transform.py
@@ -597,8 +597,8 @@ def test_diamond_tiling(ctx_factory):
     import islpy as isl
     m = isl.BasicMap(
         "[nx,nt] -> {[ix, it] -> [tx, tt, tparity, itt, itx]: "
-        "16*(tx - tt + tparity) + itx - itt = ix - it and "
-        "16*(tx + tt) + itt + itx = ix + it and "
+        "16*(tx - tt) + itx - itt = ix - it and "
+        "16*(tx + tt + tparity) + itt + itx = ix + it and "
         "0<=tparity<2 and 0 <= itx - itt < 16 and 0 <= itt+itx < 16}")
     knl = lp.map_domain(knl_for_transform, m)
     knl = lp.prioritize_loops(knl, "tt,tparity,tx,itt,itx")

From c6610312666c6f8bb38acbb439bf7fc1199aa529 Mon Sep 17 00:00:00 2001
From: "[6~" <inform@tiker.net>
Date: Thu, 14 Nov 2019 16:37:20 -0600
Subject: [PATCH 006/460] Make test_diamond_tiling an actual test

---
 test/test_transform.py | 38 ++++++++++++++++++++++++--------------
 1 file changed, 24 insertions(+), 14 deletions(-)

diff --git a/test/test_transform.py b/test/test_transform.py
index 9cb1af4ab..d8030358b 100644
--- a/test/test_transform.py
+++ b/test/test_transform.py
@@ -569,7 +569,7 @@ def test_nested_substs_in_insns(ctx_factory):
     lp.auto_test_vs_ref(ref_knl, ctx, knl)
 
 
-def test_diamond_tiling(ctx_factory):
+def test_diamond_tiling(ctx_factory, interactive=False):
     ctx = ctx_factory()
     queue = cl.CommandQueue(ctx)
 
@@ -581,19 +581,12 @@ def test_diamond_tiling(ctx_factory):
             + dt**2/dx**2 * (u[ix+1, it+1] - 2*u[ix, it+1] + u[ix-1, it+1])
             - u[ix, it])
         """)
-    ref_knl = lp.set_options(ref_knl, write_cl=True)
 
     # FIXME: Handle priorities in map_domain
     knl_for_transform = ref_knl
 
     ref_knl = lp.prioritize_loops(ref_knl, "it, ix")
 
-    nx = 43
-    u = np.zeros((nx, 200))
-    x = np.linspace(-1, 1, nx)
-    dx = x[1] - x[0]
-    u[:, 0] = u[:, 1] = np.exp(-100*x**2)
-
     import islpy as isl
     m = isl.BasicMap(
         "[nx,nt] -> {[ix, it] -> [tx, tt, tparity, itt, itx]: "
@@ -603,13 +596,30 @@ def test_diamond_tiling(ctx_factory):
     knl = lp.map_domain(knl_for_transform, m)
     knl = lp.prioritize_loops(knl, "tt,tparity,tx,itt,itx")
 
-    u_dev = cl.array.to_device(queue, u)
-    knl(queue, u=u_dev, dx=dx, dt=dx)
+    if interactive:
+        nx = 43
+        u = np.zeros((nx, 200))
+        x = np.linspace(-1, 1, nx)
+        dx = x[1] - x[0]
+        u[:, 0] = u[:, 1] = np.exp(-100*x**2)
+
+        u_dev = cl.array.to_device(queue, u)
+        knl(queue, u=u_dev, dx=dx, dt=dx)
 
-    u = u_dev.get()
-    import matplotlib.pyplot as plt
-    plt.imshow(u.T)
-    plt.show()
+        u = u_dev.get()
+        import matplotlib.pyplot as plt
+        plt.imshow(u.T)
+        plt.show()
+    else:
+        types = {"dt,dx,u": np.float64}
+        knl = lp.add_and_infer_dtypes(knl, types)
+        ref_knl = lp.add_and_infer_dtypes(ref_knl, types)
+
+        lp.auto_test_vs_ref(ref_knl, ctx, knl,
+                parameters={
+                    "nx": 200, "nt": 300,
+                    "dx": 1, "dt": 1
+                    })
 
 
 if __name__ == "__main__":

From e1ae8cf8080f93f8cb1252a194414c2cf60f1f78 Mon Sep 17 00:00:00 2001
From: "[6~" <inform@tiker.net>
Date: Thu, 14 Nov 2019 16:37:43 -0600
Subject: [PATCH 007/460] Fix handling of reductions in map_domain

---
 loopy/transform/iname.py | 32 +++++++++++++++++++++++---------
 1 file changed, 23 insertions(+), 9 deletions(-)

diff --git a/loopy/transform/iname.py b/loopy/transform/iname.py
index 6236ace72..c49f26137 100644
--- a/loopy/transform/iname.py
+++ b/loopy/transform/iname.py
@@ -1773,20 +1773,34 @@ def __init__(self, rule_mapping_context, within, new_inames, substitutions):
         self.substitutions = substitutions
 
     def map_reduction(self, expr, expn_state):
-        overlap = frozenset(expr.inames) & self.old_inames
-        if (overlap
-                and self.split_iname not in expn_state.arg_context
+        red_overlap = frozenset(expr.inames) & self.old_inames
+        arg_ctx_overlap = frozenset(expn_state.arg_context) & self.old_inames
+        if (red_overlap
                 and self.within(
                     expn_state.kernel,
                     expn_state.instruction)):
-            # FIXME
-            if len(overlap) != len(self.old_inames):
-                raise LoopyError("...")
+            if len(red_overlap) != len(self.old_inames):
+                raise LoopyError("reduction '%s' involves a part "
+                        "of the map domain inames. Reductions must "
+                        "either involve all or none of the map domain "
+                        "inames." % str(expr))
+
+            if arg_ctx_overlap:
+                if arg_ctx_overlap == red_overlap:
+                    # All variables are shadowed by context, that's OK.
+                    return super(_MapDomainMapper, self).map_reduction(
+                            expr, expn_state)
+                else:
+                    raise LoopyError("reduction '%s' has"
+                            "some of the reduction variables affected "
+                            "by the map_domain shadowed by context. "
+                            "Either all or none must be shadowed."
+                            % str(expr))
 
-            raise NotImplementedError("reductions")
             new_inames = list(expr.inames)
-            new_inames.remove(self.split_iname)
-            new_inames.extend([self.outer_iname, self.inner_iname])
+            for old_iname in self.old_inames:
+                new_inames.remove(old_iname)
+            new_inames.extend(self.new_inames)
 
             from loopy.symbolic import Reduction
             return Reduction(expr.operation, tuple(new_inames),

From ad45361773c7df95760504b8d5a9b375f9564b76 Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Mon, 13 Apr 2020 22:12:19 -0500
Subject: [PATCH 008/460] improved encapsulation approach for LexSchedule and
 map creation; improved algorithm for LexSchedule creation so that integer lex
 dims are not incremented unnecessarily when a block of code didn't contain
 any statements; fixed minor bugs in LexSchedule/item str methods; extracted
 only code related to LexSchedule/map creation (removed dependency stuff, for
 example) to shrink scope of this MR

---
 .../checker/lexicographic_order_map.py        | 159 ++++++
 loopy/schedule/checker/schedule.py            | 493 ++++++++++++++++++
 loopy/schedule/checker/utils.py               | 335 ++++++++++++
 3 files changed, 987 insertions(+)
 create mode 100644 loopy/schedule/checker/lexicographic_order_map.py
 create mode 100644 loopy/schedule/checker/schedule.py
 create mode 100644 loopy/schedule/checker/utils.py

diff --git a/loopy/schedule/checker/lexicographic_order_map.py b/loopy/schedule/checker/lexicographic_order_map.py
new file mode 100644
index 000000000..2e063e7d7
--- /dev/null
+++ b/loopy/schedule/checker/lexicographic_order_map.py
@@ -0,0 +1,159 @@
+__copyright__ = "Copyright (C) 2019 James Stevens"
+
+__license__ = """
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+"""
+
+import islpy as isl
+
+
+def get_statement_ordering_map(
+        sched_map_before, sched_map_after, lex_map, before_marker="'"):
+    """Return a mapping that maps each statement instance to
+        all statement instances occuring later.
+
+    :arg sched_map_before: An :class:`islpy.Map` representing instruction
+        instance order for the dependee as a mapping from each statement
+        instance to a point in the lexicographic ordering.
+
+    :arg sched_map_after: An :class:`islpy.Map` representing instruction
+        instance order for the depender as a mapping from each statement
+        instance to a point in the lexicographic ordering.
+
+    :arg lex_map: An :class:`islpy.Map` representing a lexicographic
+        ordering as a mapping from each point in lexicographic time
+        to every point that occurs later in lexicographic time. E.g.::
+
+            {[i0', i1', i2', ...] -> [i0, i1, i2, ...] :
+                i0' < i0 or (i0' = i0 and i1' < i1)
+                or (i0' = i0 and i1' = i1 and i2' < i2) ...}
+
+    :returns: An :class:`islpy.Map` representing the lex schedule as
+        a mapping from each statement instance to all statement instances
+        occuring later. I.e., we compose B -> L -> A^-1, where B
+        is sched_map_before, A is sched_map_after, and L is the
+        lexicographic ordering map.
+
+    """
+
+    sio = sched_map_before.apply_range(
+        lex_map).apply_range(sched_map_after.reverse())
+    # append marker to in names
+    for i in range(sio.dim(isl.dim_type.in_)):
+        sio = sio.set_dim_name(isl.dim_type.in_, i, sio.get_dim_name(
+            isl.dim_type.in_, i)+before_marker)
+    return sio
+
+
+def get_lex_order_constraint(islvars, before_names, after_names):
+    """Return a constraint represented as an :class:`islpy.Set`
+        defining a 'happens before' relationship in a lexicographic
+        ordering.
+
+    :arg islvars: A dictionary from variable names to :class:`islpy.PwAff`
+        instances that represent each of the variables
+        (islvars may be produced by `islpy.make_zero_and_vars`). The key
+        '0' is also include and represents a :class:`islpy.PwAff` zero constant.
+        This dictionary defines the space to be used for the set.
+
+    :arg before_names: A list of :class:`str` variable names representing
+        the lexicographic space dimensions for a point in lexicographic
+        time that occurs before. (see example below)
+
+    :arg after_names: A list of :class:`str` variable names representing
+        the lexicographic space dimensions for a point in lexicographic
+        time that occurs after. (see example below)
+
+    :returns: An :class:`islpy.Set` representing a constraint that enforces a
+        lexicographic ordering. E.g., if ``before_names = [i0', i1', i2']`` and
+        ``after_names = [i0, i1, i2]``, return the set::
+
+            {[i0', i1', i2', i0, i1, i2] :
+                i0' < i0 or (i0' = i0 and i1' < i1)
+                or (i0' = i0 and i1' = i1 and i2' < i2)}
+
+    """
+
+    lex_order_constraint = islvars[before_names[0]].lt_set(islvars[after_names[0]])
+    for i in range(1, len(before_names)):
+        lex_order_constraint_conj = islvars[before_names[i]].lt_set(
+            islvars[after_names[i]])
+        for j in range(i):
+            lex_order_constraint_conj = lex_order_constraint_conj & \
+                islvars[before_names[j]].eq_set(islvars[after_names[j]])
+        lex_order_constraint = lex_order_constraint | lex_order_constraint_conj
+    return lex_order_constraint
+
+
+def create_lex_order_map(
+        n_dims,
+        before_names=None,
+        after_names=None,
+        ):
+    """Return a mapping that maps each point in a lexicographic
+        ordering to every point that occurs later in lexicographic
+        time.
+
+    :arg n_dims: An :class:`int` representing the number of dimensions
+        in the lexicographic ordering.
+
+    :arg before_names: A list of :class:`str` variable names representing
+        the lexicographic space dimensions for a point in lexicographic
+        time that occurs before. (see example below)
+
+    :arg after_names: A list of :class:`str` variable names representing
+        the lexicographic space dimensions for a point in lexicographic
+        time that occurs after. (see example below)
+
+    :returns: An :class:`islpy.Map` representing a lexicographic
+        ordering as a mapping from each point in lexicographic time
+        to every point that occurs later in lexicographic time.
+        E.g., if ``before_names = [i0', i1', i2']`` and
+        ``after_names = [i0, i1, i2]``, return the map::
+
+            {[i0', i1', i2'] -> [i0, i1, i2] :
+                i0' < i0 or (i0' = i0 and i1' < i1)
+                or (i0' = i0 and i1' = i1 and i2' < i2)}
+
+    """
+
+    if before_names is None:
+        before_names = ["i%s" % (i) for i in range(n_dims)]
+    if after_names is None:
+        from loopy.schedule.checker.utils import (
+            append_marker_to_strings,
+        )
+        after_names = append_marker_to_strings(before_names, marker="_")
+
+    assert len(before_names) == len(after_names) == n_dims
+    dim_type = isl.dim_type
+
+    islvars = isl.make_zero_and_vars(
+            before_names+after_names,
+            [])
+
+    lex_order_constraint = get_lex_order_constraint(
+        islvars, before_names, after_names)
+
+    lex_map = isl.Map.from_domain(lex_order_constraint)
+    lex_map = lex_map.move_dims(
+        dim_type.out, 0, dim_type.in_,
+        len(before_names), len(after_names))
+
+    return lex_map
diff --git a/loopy/schedule/checker/schedule.py b/loopy/schedule/checker/schedule.py
new file mode 100644
index 000000000..c395863ef
--- /dev/null
+++ b/loopy/schedule/checker/schedule.py
@@ -0,0 +1,493 @@
+__copyright__ = "Copyright (C) 2019 James Stevens"
+
+__license__ = """
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+"""
+
+import islpy as isl
+
+
+class LexScheduleStatement(object):
+    """A representation of a :mod:`loopy` statement.
+
+    .. attribute:: insn_id
+
+       A :class:`str` specifying the instruction id.
+
+    .. attribute:: int_id
+
+       A :class:`int` uniquely identifying the instruction.
+
+    .. attribute:: within_inames
+
+       A :class:`list` of :class:`str` inames identifying the loops within
+       which this statement will be executed.
+
+    """
+
+    def __init__(
+            self,
+            insn_id,  # loopy insn id
+            int_id=None,  # sid int (statement id within LexSchedule)
+            within_inames=None,  # [string, ]
+            ):
+        self.insn_id = insn_id  # string
+        self.int_id = int_id
+        self.within_inames = within_inames
+
+    def __eq__(self, other):
+        return (
+            self.insn_id == other.insn_id
+            and self.int_id == other.int_id
+            and self.within_inames == other.within_inames
+            )
+
+    def __hash__(self):
+        return hash(repr(self))
+
+    def update_persistent_hash(self, key_hash, key_builder):
+        """Custom hash computation function for use with
+        :class:`pytools.persistent_dict.PersistentDict`.
+        """
+
+        key_builder.rec(key_hash, self.insn_id)
+        key_builder.rec(key_hash, self.int_id)
+        key_builder.rec(key_hash, self.within_inames)
+
+    def __str__(self):
+        if self.int_id is not None:
+            int_id = ":%d" % (self.int_id)
+        else:
+            int_id = ""
+        if self.within_inames:
+            within_inames = " {%s}" % (",".join(self.within_inames))
+        else:
+            within_inames = ""
+        return "%s%s%s" % (
+            self.insn_id, int_id, within_inames)
+
+
+class LexScheduleStatementInstance(object):
+    """A representation of a :mod:`loopy` statement instance.
+
+    .. attribute:: stmt
+
+       A :class:`LexScheduleStatement`.
+
+    .. attribute:: lex_pt
+
+       A list of :class:`int` or as :class:`str` :mod:`loopy` inames representing
+       a point or set of points in a lexicographic ordering.
+
+    """
+
+    def __init__(
+            self,
+            stmt,  # a LexScheduleStatement
+            lex_pt,  # [string/int, ]
+            ):
+        self.stmt = stmt
+        self.lex_pt = lex_pt
+
+    def __str__(self):
+        return "{%s, %s}" % (self.stmt, self.lex_pt)
+
+
+class LexSchedule(object):
+    """A program ordering represented as a mapping from statement
+       instances to points in a lexicographic ordering.
+
+    .. attribute:: stmt_instance_before
+
+       A :class:`LexScheduleStatementInstance` describing the dependee
+       statement's order relative to the depender statment by mapping
+       a statement to a point or set of points in a lexicographic
+       ordering. Points in lexicographic ordering are represented as
+       a list of :class:`int` or as :class:`str` :mod:`loopy` inames.
+
+    .. attribute:: stmt_instance_after
+
+       A :class:`LexScheduleStatementInstance` describing the depender
+       statement's order relative to the dependee statment by mapping
+       a statement to a point or set of points in a lexicographic
+       ordering. Points in lexicographic ordering are represented as
+       a list of :class:`int` or as :class:`str` :mod:`loopy` inames.
+
+    .. attribute:: statement_var_name
+
+       A :class:`str` specifying the name of the isl variable used
+       to represent the unique :class:`int` statement id.
+
+    .. attribute:: lex_var_prefix
+
+       A :class:`str` specifying the prefix to be used for the variables
+       representing the dimensions in the lexicographic ordering. E.g.,
+       a prefix of "lex" might yield variables "lex0", "lex1", "lex2".
+
+    """
+
+    statement_var_name = "statement"
+    lex_var_prefix = "l"
+
+    def __init__(
+            self,
+            linearization_items_ordered,
+            before_insn_id,
+            after_insn_id,
+            prohibited_var_names=[],
+            loops_to_ignore=set(),
+            ):
+        """
+        :arg linearization_items_ordered: A list of :class:`ScheduleItem` whose
+            order will be described by this :class:`LexSchedule`.
+
+        :arg before_insn_id: A :class:`str` instruction id specifying
+            the dependee in this pair of instructions.
+
+        :arg after_insn_id: A :class:`str` instruction id specifying
+            the depender in this pair of instructions.
+
+        :arg prohibited_var_names: A list of :class:`str` variable names
+            that may not be used as the statement variable name (e.g.,
+            because they are already being used as inames).
+
+        """
+
+        # LexScheduleStatements
+        self.stmt_instance_before = None
+        self.stmt_instance_after = None
+
+        # make sure we don't have an iname name conflict
+        # TODO use loopy's existing tool for ensuring unique var names
+        assert not any(
+            iname == self.statement_var_name for iname in prohibited_var_names)
+
+        from loopy.schedule import (EnterLoop, LeaveLoop, Barrier, RunInstruction)
+
+        # go through linearization_items_ordered and generate self.lex_schedule
+
+        # keep track of the next point in our lexicographic ordering
+        # initially this as a 1-d point with value 0
+        next_insn_lex_pt = [0]
+        stmt_since_last_block_at_tier = [False]
+        next_sid = 0
+        stmt_added_since_last_EnterLoop = False
+        stmt_added_since_last_LeaveLoop = False
+        #stmt_added_since_last_new_block = False  # blocks start at open/close loop
+        for linearization_item in linearization_items_ordered:
+            if isinstance(linearization_item, EnterLoop):
+                iname = linearization_item.iname
+                if iname in loops_to_ignore:
+                    continue
+
+                # We could always increment next_insn_lex_pt[-1] here since this new
+                # section of code comes after the previous section (statements
+                # since last opened/closed loop), but if we have not added any statements
+                # within this block yet, we don't have to
+                # (effectively ignoring that section of code).
+                if stmt_since_last_block_at_tier[-1]:
+                    next_insn_lex_pt[-1] = next_insn_lex_pt[-1]+1
+                    stmt_since_last_block_at_tier[-1] = False
+
+                # upon entering a loop, we enter a new (deeper) tier,
+                # add one lex dimension for the loop variable,
+                # add second lex dim to enumerate code blocks within new loop, and
+                # append a dim to stmt_since_last_block_at_tier to represent new tier
+                next_insn_lex_pt.append(iname)
+                next_insn_lex_pt.append(0)
+                stmt_since_last_block_at_tier.append(False)
+            elif isinstance(linearization_item, LeaveLoop):
+                if linearization_item.iname in loops_to_ignore:
+                    continue
+                # upon leaving a loop,
+                # pop lex dimension for enumerating code blocks within this loop, and
+                # pop lex dimension for the loop variable, and
+                # increment lex dim val enumerating items in current code block
+                next_insn_lex_pt.pop()
+                next_insn_lex_pt.pop()
+
+                # We could always increment next_insn_lex_pt[-1] here since this new
+                # block of code comes after the previous block (all statements
+                # since last opened/closed loop), but if we have not added any statements
+                # within this block yet, we don't have to
+                # (effectively ignoring that section of code).
+                stmt_since_last_block_at_tier.pop()
+                if stmt_since_last_block_at_tier[-1]:
+                    next_insn_lex_pt[-1] = next_insn_lex_pt[-1]+1
+                    stmt_since_last_block_at_tier[-1] = False
+            elif isinstance(linearization_item, (RunInstruction, Barrier)):
+                from loopy.schedule.checker.utils import (
+                    _get_insn_id_from_linearization_item,
+                )
+                lp_insn_id = _get_insn_id_from_linearization_item(linearization_item)
+                if lp_insn_id is None:
+                    # TODO make sure it's okay to ignore barriers without id
+                    # (because they'll never be part of a dependency?)
+                    # matmul example has barrier that fails this assertion...
+                    # assert linearization_item.originating_insn_id is not None
+                    continue
+
+                # only process before/after insns, otherwise ignore
+                if lp_insn_id == before_insn_id and lp_insn_id == after_insn_id:
+                    # add before sched item
+                    self.stmt_instance_before = LexScheduleStatementInstance(
+                            LexScheduleStatement(
+                                insn_id=lp_insn_id,
+                                int_id=next_sid,  # int representing insn
+                                ),
+                            next_insn_lex_pt[:])
+                    # add after sched item
+                    self.stmt_instance_after = LexScheduleStatementInstance(
+                            LexScheduleStatement(
+                                insn_id=lp_insn_id,
+                                int_id=next_sid,  # int representing insn
+                                ),
+                            next_insn_lex_pt[:])
+
+                    # increment lex dim val enumerating items in current code block
+                    next_insn_lex_pt[-1] = next_insn_lex_pt[-1] + 1
+                    next_sid += 1
+
+                    # all current (nested) blocks now contain a statement
+                    stmt_since_last_block_at_tier = [True]*len(stmt_since_last_block_at_tier)
+                elif lp_insn_id == before_insn_id:
+                    # add before sched item
+                    self.stmt_instance_before = LexScheduleStatementInstance(
+                            LexScheduleStatement(
+                                insn_id=lp_insn_id,
+                                int_id=next_sid,  # int representing insn
+                                ),
+                            next_insn_lex_pt[:])
+
+                    # increment lex dim val enumerating items in current code block
+                    next_insn_lex_pt[-1] = next_insn_lex_pt[-1] + 1
+                    next_sid += 1
+
+                    # all current (nested) blocks now contain a statement
+                    stmt_since_last_block_at_tier = [True]*len(stmt_since_last_block_at_tier)
+                elif lp_insn_id == after_insn_id:
+                    # add after sched item
+                    self.stmt_instance_after = LexScheduleStatementInstance(
+                            LexScheduleStatement(
+                                insn_id=lp_insn_id,
+                                int_id=next_sid,  # int representing insn
+                                ),
+                            next_insn_lex_pt[:])
+
+                    # increment lex dim val enumerating items in current code block
+                    next_insn_lex_pt[-1] = next_insn_lex_pt[-1] + 1
+                    next_sid += 1
+
+                    # all current (nested) blocks now contain a statement
+                    stmt_since_last_block_at_tier = [True]*len(stmt_since_last_block_at_tier)
+            else:
+                pass
+            # to save time, stop when we've created both statements
+            if self.stmt_instance_before and self.stmt_instance_after:
+                break
+
+        # at this point, lex_schedule may contain lex points missing dimensions,
+        # the values in these missing dims should be zero, so add them
+        self.pad_lex_pts_with_zeros()
+
+    def loopy_insn_id_to_lex_sched_id(self):
+        """Return a dictionary mapping insn_id to int_id, where ``insn_id`` and
+            ``int_id`` refer to the ``insn_id`` and ``int_id`` attributes of
+            :class:`LexScheduleStatement`.
+        """
+        return {
+            self.stmt_instance_before.stmt.insn_id:
+                self.stmt_instance_before.stmt.int_id,
+            self.stmt_instance_after.stmt.insn_id:
+                self.stmt_instance_after.stmt.int_id,
+            }
+
+    def max_lex_dims(self):
+        return max([
+            len(self.stmt_instance_before.lex_pt),
+            len(self.stmt_instance_after.lex_pt)])
+
+    def pad_lex_pts_with_zeros(self):
+        """Find the maximum number of lexicographic dimensions represented
+            in the lexicographic ordering, and if any
+            :class:`LexScheduleStatement` maps to a point in lexicographic
+            time with fewer dimensions, add a zero for each of the missing
+            dimensions.
+        """
+
+        max_lex_dim = self.max_lex_dims()
+        self.stmt_instance_before = LexScheduleStatementInstance(
+            self.stmt_instance_before.stmt,
+            self.stmt_instance_before.lex_pt[:] + [0]*(
+                max_lex_dim-len(self.stmt_instance_before.lex_pt))
+            )
+        self.stmt_instance_after = LexScheduleStatementInstance(
+            self.stmt_instance_after.stmt,
+            self.stmt_instance_after.lex_pt[:] + [0]*(
+                max_lex_dim-len(self.stmt_instance_after.lex_pt))
+            )
+
+    def create_isl_maps(
+            self,
+            dom_before,
+            dom_after,
+            dom_inames_ordered_before=None,
+            dom_inames_ordered_after=None,
+            ):
+        """Create two isl maps representing lex schedule as two mappings
+            from statement instances to lexicographic time, one for
+            the dependee and one for the depender.
+
+        :arg dom_before: A :class:`islpy.BasicSet` representing the
+            domain for the dependee statement.
+
+        :arg dom_after: A :class:`islpy.BasicSet` representing the
+            domain for the dependee statement.
+
+        :arg dom_inames_ordered_before: A list of :class:`str`
+            representing the union of inames used in instances of the
+            dependee statement. ``statement_var_name`` and
+            ``dom_inames_ordered_before`` are the names of the dims of
+            the space of the ISL map domain for the dependee.
+
+        :arg dom_inames_ordered_after: A list of :class:`str`
+            representing the union of inames used in instances of the
+            depender statement. ``statement_var_name`` and
+            ``dom_inames_ordered_after`` are the names of the dims of
+            the space of the ISL map domain for the depender.
+
+        :returns: A two-tuple containing two :class:`islpy.Map`s
+            representing the schedule as two mappings
+            from statement instances to lexicographic time, one for
+            the dependee and one for the depender.
+
+        """
+
+        from loopy.schedule.checker.utils import (
+            create_symbolic_isl_map_from_tuples,
+            add_dims_to_isl_set
+        )
+
+        from loopy.schedule.checker.utils import (
+            list_var_names_in_isl_sets,
+        )
+        if dom_inames_ordered_before is None:
+            dom_inames_ordered_before = list_var_names_in_isl_sets(
+                [dom_before])
+        if dom_inames_ordered_after is None:
+            dom_inames_ordered_after = list_var_names_in_isl_sets(
+                [dom_after])
+
+        # create an isl space
+        # {('statement', <inames> used in >=1 statement domain>) ->
+        #  (lexicographic ordering dims)}
+        from loopy.schedule.checker.utils import (
+            get_isl_space
+        )
+        params_sched = []
+        out_names_sched = self.get_lex_var_names()
+
+        in_names_sched_before = [
+            self.statement_var_name] + dom_inames_ordered_before[:]
+        sched_space_before = get_isl_space(
+            params_sched, in_names_sched_before, out_names_sched)
+        in_names_sched_after = [
+            self.statement_var_name] + dom_inames_ordered_after[:]
+        sched_space_after = get_isl_space(
+            params_sched, in_names_sched_after, out_names_sched)
+
+        # Insert 'statement' dim into domain so that its space allows for
+        # intersection with sched map later
+        doms_to_intersect_before = [
+                add_dims_to_isl_set(
+                    dom_before, isl.dim_type.set,
+                    [self.statement_var_name], 0),
+                ]
+        doms_to_intersect_after = [
+                add_dims_to_isl_set(
+                    dom_after, isl.dim_type.set,
+                    [self.statement_var_name], 0),
+                ]
+
+        # Each isl map representing the schedule maps
+        # statement instances -> lex time
+
+        # Right now, statement tuples consist of single int.
+        # Add all inames from domains to map domain tuples.
+
+        # create isl map
+        return (
+            create_symbolic_isl_map_from_tuples(
+                zip(
+                    [(
+                        (self.stmt_instance_before.stmt.int_id,)
+                        + tuple(dom_inames_ordered_before),
+                        self.stmt_instance_before.lex_pt
+                    )],
+                    doms_to_intersect_before
+                ),
+                sched_space_before, self.statement_var_name),
+            create_symbolic_isl_map_from_tuples(
+                zip(
+                    [(
+                        (self.stmt_instance_after.stmt.int_id,)
+                        + tuple(dom_inames_ordered_after),
+                        self.stmt_instance_after.lex_pt)],
+                    doms_to_intersect_after
+                ),
+                sched_space_after, self.statement_var_name)
+            )
+
+    def get_lex_var_names(self):
+        return [self.lex_var_prefix+str(i)
+                for i in range(self.max_lex_dims())]
+
+    def get_lex_order_map_for_sched_space(self):
+        """Return an :class:`islpy.BasicMap` that maps each point in a
+            lexicographic ordering to every point that is
+            lexocigraphically greater.
+        """
+
+        from loopy.schedule.checker.lexicographic_order_map import (
+            create_lex_order_map,
+        )
+        n_dims = self.max_lex_dims()
+        return create_lex_order_map(
+            n_dims, before_names=self.get_lex_var_names())
+
+    def __eq__(self, other):
+        return (
+            self.stmt_instance_before == other.stmt_instance_before
+            and self.stmt_instance_after == other.stmt_instance_after)
+
+    def __str__(self):
+        sched_str = "Before: {\n"
+        domain_elem = "[%s=%s,<inames>]" % (
+            self.statement_var_name,
+            self.stmt_instance_before.stmt.int_id)
+        sched_str += "%s -> %s;\n" % (domain_elem, self.stmt_instance_before.lex_pt)
+        sched_str += "}\n"
+
+        sched_str += "After: {\n"
+        domain_elem = "[%s=%s,<inames>]" % (
+            self.statement_var_name,
+            self.stmt_instance_after.stmt.int_id)
+        sched_str += "%s -> %s;\n" % (domain_elem, self.stmt_instance_after.lex_pt)
+        sched_str += "}"
+        return sched_str
diff --git a/loopy/schedule/checker/utils.py b/loopy/schedule/checker/utils.py
new file mode 100644
index 000000000..8757406b7
--- /dev/null
+++ b/loopy/schedule/checker/utils.py
@@ -0,0 +1,335 @@
+__copyright__ = "Copyright (C) 2019 James Stevens"
+
+__license__ = """
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+"""
+
+import islpy as isl
+
+
+def prettier_map_string(isl_map):
+    return str(isl_map
+               ).replace("{ ", "{\n").replace(" }", "\n}").replace("; ", ";\n")
+
+
+def get_islvars_from_space(space):
+    param_names = space.get_var_names(isl.dim_type.param)
+    in_names = space.get_var_names(isl.dim_type.in_)
+    out_names = space.get_var_names(isl.dim_type.out)
+    return isl.make_zero_and_vars(in_names+out_names, param_names)
+
+
+def add_dims_to_isl_set(isl_set, dim_type, names, new_pose_start):
+    new_set = isl_set.insert_dims(
+        dim_type, new_pose_start, len(names)
+        ).set_dim_name(dim_type, new_pose_start, names[0])
+    for i, name in enumerate(names[1:]):
+        new_set = new_set.set_dim_name(dim_type, new_pose_start+1+i, name)
+    return new_set
+
+
+def reorder_dims_by_name(
+        isl_set, dim_type, desired_dims_ordered,
+        add_missing=False, new_names_are_permutation_only=False):
+    """Return an isl_set with the dimensions in the specified order.
+
+    :arg isl_set: A :class:`islpy.Set` whose dimensions are
+        to be reordered.
+
+    :arg dim_type: A :class:`islpy.dim_type`, i.e., an :class:`int`,
+        specifying the dimension to be reordered.
+
+    :arg desired_dims_ordered: A :class:`list` of :class:`str` elements
+        representing the desired dimensions order by dimension name.
+
+    :arg add_missing: A :class:`bool` specifying whether to insert
+        dimensions (by name) found in `desired_dims_ordered` that are not
+        present in `isl_set`.
+
+    :arg new_names_are_permutation_only: A :class:`bool` indicating that
+        `desired_dims_ordered` contains the same names as the specified
+        dimensions in `isl_set`, and does not, e.g., contain additional
+        dimension names not found in `isl_set`. If set to True, and these
+        two sets of names do not match, an error is produced.
+
+    :returns: An :class:`islpy.Set` matching `isl_set` with the
+        dimension order matching `desired_dims_ordered`, optionally
+        including additional dimensions present in `desred_dims_ordered`
+        that are not present in `isl_set`.
+
+    """
+
+    assert set(isl_set.get_var_names(dim_type)).issubset(desired_dims_ordered)
+    assert dim_type != isl.dim_type.param
+
+    if new_names_are_permutation_only and (
+            set(isl_set.get_var_names(dim_type))
+            != set(desired_dims_ordered)):
+        raise ValueError(
+            "Var name sets must match with new_names_are_permutation_only=True. "
+            "isl vars: %s, desired dims: %s"
+            % (isl_set.get_var_names(dim_type), desired_dims_ordered))
+
+    other_dim_type = isl.dim_type.param
+    other_dim_len = len(isl_set.get_var_names(other_dim_type))
+
+    new_set = isl_set.copy()
+    for desired_pose, name in enumerate(desired_dims_ordered):
+        # if iname doesn't exist in set, add dim:
+        if name not in new_set.get_var_names(dim_type):
+            if add_missing:
+                # insert missing dim in correct location
+                new_set = new_set.insert_dims(
+                    dim_type, desired_pose, 1
+                    ).set_dim_name(
+                    dim_type, desired_pose, name)
+        else:  # iname exists in set
+            current_pose = new_set.find_dim_by_name(dim_type, name)
+            if current_pose != desired_pose:
+                # move_dims(dst_type, dst_pose, src_type, src_pose, n)
+
+                # first move to other dim because isl is stupid
+                new_set = new_set.move_dims(
+                    other_dim_type, other_dim_len, dim_type, current_pose, 1)
+
+                # now move it where we actually want it
+                new_set = new_set.move_dims(
+                    dim_type, desired_pose, other_dim_type, other_dim_len, 1)
+
+    return new_set
+
+
+def align_isl_maps_by_var_names(input_map, target_map):
+
+    # align params
+    aligned_input_map = input_map.align_params(target_map.space)
+
+    # align in_ dims
+    target_map_in_names = target_map.space.get_var_names(isl.dim_type.in_)
+    aligned_input_map = reorder_dims_by_name(
+        aligned_input_map,
+        isl.dim_type.in_,
+        target_map_in_names,
+        add_missing=False,
+        new_names_are_permutation_only=True,
+        )
+
+    # align out dims
+    target_map_out_names = target_map.space.get_var_names(isl.dim_type.out)
+    aligned_input_map = reorder_dims_by_name(
+        aligned_input_map,
+        isl.dim_type.out,
+        target_map_out_names,
+        add_missing=False,
+        new_names_are_permutation_only=True,
+        )
+
+    return aligned_input_map
+
+
+def append_marker_to_strings(strings, marker="'"):
+    if not isinstance(strings, list):
+        raise ValueError("append_marker_to_strings did not receive a list")
+    else:
+        return [s+marker for s in strings]
+
+
+def _union_of_isl_sets_or_maps(set_list):
+    union = set_list[0]
+    for s in set_list[1:]:
+        union = union.union(s)
+    return union
+
+
+def list_var_names_in_isl_sets(
+        isl_sets,
+        set_dim=isl.dim_type.set):
+    inames = set()
+    for isl_set in isl_sets:
+        inames.update(isl_set.get_var_names(set_dim))
+    return list(inames)
+
+
+def create_symbolic_isl_map_from_tuples(
+        tuple_pairs_with_domains,
+        space,
+        statement_var_name,
+        ):
+    """Return an :class:`islpy.Map` constructed using the provided space,
+        mapping input->output tuples provided in `tuple_pairs_with_domains`,
+        with each set of tuple variables constrained by the domains provided.
+
+    :arg tuple_pairs_with_domains: A :class:`list` with each element being
+        a tuple of the form `((tup_in, tup_out), domain)`.
+        `tup_in` and `tup_out` are tuples containing elements of type
+        :class:`int` and :class:`str` representing values for the
+        input and output dimensions in `space`, and `domain` is a
+        :class:`islpy.Set` constraining variable bounds.
+
+    :arg space: A :class:`islpy.Space` to be used to create the map.
+
+    :arg statement_var_name: A :class:`str` specifying the name of the
+        isl variable used to represent the unique :class:`int` statement id.
+
+    :returns: A :class:`islpy.Map` constructed using the provided space
+        as follows. For each `((tup_in, tup_out), domain)` in
+        `tuple_pairs_with_domains`, map
+        `(tup_in)->(tup_out) : domain`, where `tup_in` and `tup_out` are
+        numeric or symbolic values assigned to the input and output
+        dimension variables in `space`, and `domain` specifies constraints
+        on these values.
+
+    """
+    # TODO allow None for domains
+
+    dim_type = isl.dim_type
+
+    #param_names = space.get_var_names(isl.dim_type.param)
+    space_out_names = space.get_var_names(dim_type.out)
+    space_in_names = space.get_var_names(isl.dim_type.in_)
+
+    islvars = get_islvars_from_space(space)
+
+    # loop through pairs and create a set that will later be converted to a map
+
+    all_maps = []
+    for (tup_in, tup_out), dom in tuple_pairs_with_domains:
+
+        # initialize constraint with true
+        constraint = islvars[0].eq_set(islvars[0])
+
+        # set values for 'in' dimension using tuple vals
+        assert len(tup_in) == len(space_in_names)
+        for dim_name, val_in in zip(space_in_names, tup_in):
+            if isinstance(val_in, int):
+                constraint = constraint \
+                    & islvars[dim_name].eq_set(islvars[0]+val_in)
+            else:
+                constraint = constraint \
+                    & islvars[dim_name].eq_set(islvars[val_in])
+
+        # set values for 'out' dimension using tuple vals
+        assert len(tup_out) == len(space_out_names)
+        for dim_name, val_out in zip(space_out_names, tup_out):
+            if isinstance(val_out, int):
+                constraint = constraint \
+                    & islvars[dim_name].eq_set(islvars[0]+val_out)
+            else:
+                constraint = constraint \
+                    & islvars[dim_name].eq_set(islvars[val_out])
+
+        # convert set to map by moving dimensions around
+        map_from_set = isl.Map.from_domain(constraint)
+        map_from_set = map_from_set.move_dims(
+            dim_type.out, 0, dim_type.in_,
+            len(space_in_names), len(space_out_names))
+
+        assert space_in_names == map_from_set.get_var_names(
+            isl.dim_type.in_)
+
+        # if there are any dimensions in dom that are missing from
+        # map_from_set, we have a problem I think?
+        # (assertion checks this in add_missing...
+        dom_with_all_inames = reorder_dims_by_name(
+            dom, isl.dim_type.set,
+            space_in_names,
+            add_missing=True,
+            new_names_are_permutation_only=False,
+            )
+
+        # intersect domain with this map
+        all_maps.append(
+            map_from_set.intersect_domain(dom_with_all_inames))
+
+    return _union_of_isl_sets_or_maps(all_maps)
+
+
+def set_all_isl_space_names(
+        isl_space, param_names=None, in_names=None, out_names=None):
+    """Return a copy of `isl_space` with the specified dimension names.
+        If no names are provided, use `p0, p1, ...` for parameters,
+        `i0, i1, ...`, for in_ dimensions, and `o0, o1, ...` for out
+        dimensions.
+
+    """
+
+    new_space = isl_space.copy()
+    dim_type = isl.dim_type
+    if param_names:
+        for i, p in enumerate(param_names):
+            new_space = new_space.set_dim_name(dim_type.param, i, p)
+    else:
+        for i in range(len(isl_space.get_var_names(dim_type.param))):
+            new_space = new_space.set_dim_name(dim_type.param, i, "p%d" % (i))
+    if in_names:
+        for i, p in enumerate(in_names):
+            new_space = new_space.set_dim_name(dim_type.in_, i, p)
+    else:
+        for i in range(len(isl_space.get_var_names(dim_type.in_))):
+            new_space = new_space.set_dim_name(dim_type.in_, i, "i%d" % (i))
+    if out_names:
+        for i, p in enumerate(out_names):
+            new_space = new_space.set_dim_name(dim_type.out, i, p)
+    else:
+        for i in range(len(isl_space.get_var_names(dim_type.out))):
+            new_space = new_space.set_dim_name(dim_type.out, i, "o%d" % (i))
+    return new_space
+
+
+def get_isl_space(param_names, in_names, out_names):
+    """Return an :class:`islpy.Space` with the specified dimension names.
+    """
+
+    space = isl.Space.alloc(
+        isl.DEFAULT_CONTEXT, len(param_names), len(in_names), len(out_names))
+    return set_all_isl_space_names(
+        space, param_names=param_names, in_names=in_names, out_names=out_names)
+
+
+def get_concurrent_inames(knl):
+    from loopy.kernel.data import ConcurrentTag
+    conc_inames = set()
+    non_conc_inames = set()
+
+    all_inames = knl.all_inames()
+    for iname in all_inames:
+        if knl.iname_tags_of_type(iname, ConcurrentTag):
+            conc_inames.add(iname)
+        else:
+            non_conc_inames.add(iname)
+
+    return conc_inames, all_inames-conc_inames
+
+
+def _get_insn_id_from_linearization_item(linearization_item):
+    # TODO could use loopy's sched_item_to_insn_id()
+    from loopy.schedule import Barrier
+    if isinstance(linearization_item, Barrier):
+        return linearization_item.originating_insn_id
+    else:
+        return linearization_item.insn_id
+
+
+def _get_EnterLoop_inames(linearization_items, knl):
+    from loopy.schedule import EnterLoop
+    loop_inames = set()
+    for linearization_item in linearization_items:
+        if isinstance(linearization_item, EnterLoop):
+            loop_inames.add(linearization_item.iname)
+    return loop_inames

From 1ce81703127df9bddcfe841ee3663373a8369a7c Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Mon, 13 Apr 2020 22:14:00 -0500
Subject: [PATCH 009/460] adding updated and stripped down checker/__init__
 (with aforementioned encapsulation improvements)

---
 loopy/schedule/checker/__init__.py | 104 +++++++++++++++++++++++++++++
 1 file changed, 104 insertions(+)
 create mode 100644 loopy/schedule/checker/__init__.py

diff --git a/loopy/schedule/checker/__init__.py b/loopy/schedule/checker/__init__.py
new file mode 100644
index 000000000..1acee56b9
--- /dev/null
+++ b/loopy/schedule/checker/__init__.py
@@ -0,0 +1,104 @@
+__copyright__ = "Copyright (C) 2019 James Stevens"
+
+__license__ = """
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+"""
+
+
+def get_schedule_for_statement_pair(
+        knl,
+        linearization_items,
+        insn_id_before,
+        insn_id_after,
+        prohibited_var_names=set(),
+        ):
+    """A set of dependencies between two statements.
+
+    .. arg insn_id_before: An instruction identifier that is unique within
+        a :class:`loopy.kernel.LoopKernel`.
+
+    .. arg insn_id_after: An instruction identifier that is unique within
+        a :class:`loopy.kernel.LoopKernel`.
+
+    """
+
+    # We don't retrieve linearization items from knl because knl may not be
+    # (fully) linearized yet. This function may be called part way through the
+    # linearization process and receive the current (unfinished) set of
+    # linearization items
+
+    # Preprocess if not already preprocessed
+    from loopy import preprocess_kernel
+    preproc_knl = preprocess_kernel(knl)
+
+    if not prohibited_var_names:
+        prohibited_var_names = preproc_knl.all_inames()
+
+    # Get EnterLoop inames tagged as concurrent so LexSchedule can ignore
+    # (In the future, this shouldn't be necessary because there
+    #  won't be any inames with ConcurrentTags in EnterLoop linearization items.
+    #  Test exercising this: test_linearization_checker_with_stroud_bernstein())
+    from loopy.schedule.checker.utils import (
+        get_concurrent_inames,
+        _get_EnterLoop_inames,
+    )
+    conc_inames, _ = get_concurrent_inames(preproc_knl)
+    enterloop_inames = _get_EnterLoop_inames(linearization_items, preproc_knl)
+    conc_loop_inames = conc_inames & enterloop_inames
+    if conc_loop_inames:
+        from warnings import warn
+        warn(
+            "get_schedule_for_statement_pair encountered EnterLoop for inames %s "
+            "with ConcurrentTag(s) in linearization for kernel %s. "
+            "Ignoring these loops." % (conc_loop_inames, preproc_knl.name))
+
+    # Create LexSchedule: mapping of {statement instance: lex point}
+    # include only instructions involved in this dependency
+    from loopy.schedule.checker.schedule import LexSchedule
+    return LexSchedule(
+        linearization_items,
+        insn_id_before,
+        insn_id_after,
+        prohibited_var_names=prohibited_var_names,
+        loops_to_ignore=conc_loop_inames,
+        )
+
+
+def get_isl_maps_for_LexSchedule(
+        lex_sched,
+        knl,
+        insn_id_before,
+        insn_id_after,
+        ):
+    # Get two isl maps representing the LexSchedule,
+    # one for the 'before' linearization item and one for 'after';
+    # this requires the iname domains
+
+    insn_before_inames = knl.id_to_insn[insn_id_before].within_inames
+    insn_after_inames = knl.id_to_insn[insn_id_after].within_inames
+    dom_before = knl.get_inames_domain(insn_before_inames)
+    dom_after = knl.get_inames_domain(insn_after_inames)
+
+    isl_sched_map_before, isl_sched_map_after = \
+        lex_sched.create_isl_maps(
+            dom_before,
+            dom_after,
+        )
+
+    return isl_sched_map_before, isl_sched_map_after

From 387a8f12ef594cdf6d0fc221305bbc0bb9692b4a Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Mon, 13 Apr 2020 22:25:05 -0500
Subject: [PATCH 010/460] add test for LexSchedule creation and conversion of
 LexSchedule into isl map

---
 test/test_linearization_checker.py | 367 +++++++++++++++++++++++++++++
 1 file changed, 367 insertions(+)
 create mode 100644 test/test_linearization_checker.py

diff --git a/test/test_linearization_checker.py b/test/test_linearization_checker.py
new file mode 100644
index 000000000..7e145ce1d
--- /dev/null
+++ b/test/test_linearization_checker.py
@@ -0,0 +1,367 @@
+from __future__ import division, print_function
+
+__copyright__ = "Copyright (C) 2019 James Stevens"
+
+__license__ = """
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+"""
+
+import six  # noqa: F401
+import sys
+import numpy as np
+import loopy as lp
+from pyopencl.tools import (  # noqa
+    pytest_generate_tests_for_pyopencl
+    as pytest_generate_tests)
+from loopy.version import LOOPY_USE_LANGUAGE_VERSION_2018_2  # noqa
+import logging
+from loopy.kernel import KernelState
+from loopy import (
+    preprocess_kernel,
+    get_one_linearized_kernel,
+)
+
+logger = logging.getLogger(__name__)
+
+try:
+    import faulthandler
+except ImportError:
+    pass
+else:
+    faulthandler.enable()
+
+
+def test_lexschedule_and_islmap_creation():
+    import islpy as isl
+    from loopy.schedule.checker import (
+        get_schedule_for_statement_pair,
+        get_isl_maps_for_LexSchedule,
+    )
+    from loopy.schedule.checker.utils import (
+        align_isl_maps_by_var_names,
+    )
+
+    # example kernel
+    knl = lp.make_kernel(
+        [
+            "{[i]: 0<=i<pi}",
+            "{[k]: 0<=k<pk}",
+            "{[j]: 0<=j<pj}",
+            "{[t]: 0<=t<pt}",
+        ],
+        """
+        for i
+            for k
+                <>temp = b[i,k]  {id=insn_a}
+            end
+            for j
+                a[i,j] = temp + 1  {id=insn_b,dep=insn_a}
+                c[i,j] = d[i,j]  {id=insn_c}
+            end
+        end
+        for t
+            e[t] = f[t]  {id=insn_d}
+        end
+        """,
+        name="example",
+        assumptions="pi,pj,pk,pt >= 1",
+        lang_version=(2018, 2)
+        )
+    knl = lp.add_and_infer_dtypes(
+            knl,
+            {"b": np.float32, "d": np.float32, "f": np.float32})
+    knl = lp.prioritize_loops(knl, "i,k")
+    knl = lp.prioritize_loops(knl, "i,j")
+
+    # get a linearization
+    knl = preprocess_kernel(knl)
+    knl = get_one_linearized_kernel(knl)
+    linearization_items = knl.linearization
+
+    # Create LexSchedule: mapping of {statement instance: lex point}
+    lex_sched_AB = get_schedule_for_statement_pair(
+        knl,
+        linearization_items,
+        "insn_a",
+        "insn_b",
+        )
+    lex_sched_AC = get_schedule_for_statement_pair(
+        knl,
+        linearization_items,
+        "insn_a",
+        "insn_c",
+        )
+    lex_sched_AD = get_schedule_for_statement_pair(
+        knl,
+        linearization_items,
+        "insn_a",
+        "insn_d",
+        )
+    lex_sched_BC = get_schedule_for_statement_pair(
+        knl,
+        linearization_items,
+        "insn_b",
+        "insn_c",
+        )
+    lex_sched_BD = get_schedule_for_statement_pair(
+        knl,
+        linearization_items,
+        "insn_b",
+        "insn_d",
+        )
+    lex_sched_CD = get_schedule_for_statement_pair(
+        knl,
+        linearization_items,
+        "insn_c",
+        "insn_d",
+        )
+
+    # Relationship between insn_a and insn_b ---------------------------------------
+
+    assert lex_sched_AB.stmt_instance_before.lex_pt == [0, 'i', 0, 'k', 0]
+    assert lex_sched_AB.stmt_instance_after.lex_pt == [0, 'i', 1, 'j', 0]
+
+    # Get two isl maps representing the LexSchedule
+
+    isl_sched_map_before, isl_sched_map_after = \
+         get_isl_maps_for_LexSchedule(lex_sched_AB, knl, "insn_a", "insn_b")
+
+    # Create expected maps, align, compare
+
+    isl_sched_map_before_expected = isl.Map(
+        "[pi, pk] -> { "
+        "[statement = 0, i, k] -> [l0 = 0, l1 = i, l2 = 0, l3 = k, l4 = 0] : "
+        "0 <= i < pi and 0 <= k < pk }"
+        )
+    isl_sched_map_before_expected = align_isl_maps_by_var_names(
+        isl_sched_map_before_expected, isl_sched_map_before)
+
+    isl_sched_map_after_expected = isl.Map(
+        "[pi, pj] -> { "
+        "[statement = 1, i, j] -> [l0 = 0, l1 = i, l2 = 1, l3 = j, l4 = 0] : "
+        "0 <= i < pi and 0 <= j < pj }"
+        )
+    isl_sched_map_after_expected = align_isl_maps_by_var_names(
+        isl_sched_map_after_expected, isl_sched_map_after)
+
+    assert isl_sched_map_before == isl_sched_map_before_expected
+    assert isl_sched_map_after == isl_sched_map_after_expected
+
+    # ------------------------------------------------------------------------------
+    # Relationship between insn_a and insn_c ---------------------------------------
+
+    assert lex_sched_AC.stmt_instance_before.lex_pt == [0, 'i', 0, 'k', 0]
+    assert lex_sched_AC.stmt_instance_after.lex_pt == [0, 'i', 1, 'j', 0]
+
+    # Get two isl maps representing the LexSchedule
+
+    isl_sched_map_before, isl_sched_map_after = \
+         get_isl_maps_for_LexSchedule(lex_sched_AC, knl, "insn_a", "insn_c")
+
+    # Create expected maps, align, compare
+
+    isl_sched_map_before_expected = isl.Map(
+        "[pi, pk] -> { "
+        "[statement = 0, i, k] -> [l0 = 0, l1 = i, l2 = 0, l3 = k, l4 = 0] : "
+        "0 <= i < pi and 0 <= k < pk }"
+        )
+    isl_sched_map_before_expected = align_isl_maps_by_var_names(
+        isl_sched_map_before_expected, isl_sched_map_before)
+
+    isl_sched_map_after_expected = isl.Map(
+        "[pi, pj] -> { "
+        "[statement = 1, i, j] -> [l0 = 0, l1 = i, l2 = 1, l3 = j, l4 = 0] : "
+        "0 <= i < pi and 0 <= j < pj }"
+        )
+    isl_sched_map_after_expected = align_isl_maps_by_var_names(
+        isl_sched_map_after_expected, isl_sched_map_after)
+
+    assert isl_sched_map_before == isl_sched_map_before_expected
+    assert isl_sched_map_after == isl_sched_map_after_expected
+
+    # ------------------------------------------------------------------------------
+    # Relationship between insn_a and insn_d ---------------------------------------
+
+    assert lex_sched_AD.stmt_instance_before.lex_pt == [0, 'i', 0, 'k', 0]
+    assert lex_sched_AD.stmt_instance_after.lex_pt == [1, 't', 0, 0, 0]
+
+    # Get two isl maps representing the LexSchedule
+
+    isl_sched_map_before, isl_sched_map_after = \
+         get_isl_maps_for_LexSchedule(lex_sched_AD, knl, "insn_a", "insn_d")
+
+    # Create expected maps, align, compare
+
+    isl_sched_map_before_expected = isl.Map(
+        "[pi, pk] -> { "
+        "[statement = 0, i, k] -> [l0 = 0, l1 = i, l2 = 0, l3 = k, l4 = 0] : "
+        "0 <= i < pi and 0 <= k < pk }"
+        )
+    isl_sched_map_before_expected = align_isl_maps_by_var_names(
+        isl_sched_map_before_expected, isl_sched_map_before)
+
+    isl_sched_map_after_expected = isl.Map(
+        "[pt] -> { "
+        "[statement = 1, t] -> [l0 = 1, l1 = t, l2 = 0, l3 = 0, l4 = 0] : "
+        "0 <= t < pt }"
+        )
+    isl_sched_map_after_expected = align_isl_maps_by_var_names(
+        isl_sched_map_after_expected, isl_sched_map_after)
+
+    assert isl_sched_map_before == isl_sched_map_before_expected
+    assert isl_sched_map_after == isl_sched_map_after_expected
+
+    # ------------------------------------------------------------------------------
+    # Relationship between insn_b and insn_c ---------------------------------------
+
+    # insn_b and insn_c could have been linearized in either order
+    if lex_sched_BC.stmt_instance_before.stmt.int_id == 0:
+        # insn_c comes first
+        assert lex_sched_BC.stmt_instance_before.lex_pt == [0, 'i', 0, 'j', 0]
+        assert lex_sched_BC.stmt_instance_after.lex_pt == [0, 'i', 0, 'j', 1]
+
+        # Get two isl maps representing the LexSchedule
+
+        isl_sched_map_before, isl_sched_map_after = \
+             get_isl_maps_for_LexSchedule(lex_sched_BC, knl, "insn_b", "insn_c")
+
+        # Create expected maps, align, compare
+
+        isl_sched_map_before_expected = isl.Map(
+            "[pi, pj] -> { "
+            "[statement = 0, i, j] -> [l0 = 0, l1 = i, l2 = 0, l3 = j, l4 = 1] : "
+            "0 <= i < pi and 0 <= j < pj }"
+            )
+        isl_sched_map_before_expected = align_isl_maps_by_var_names(
+            isl_sched_map_before_expected, isl_sched_map_before)
+
+        isl_sched_map_after_expected = isl.Map(
+            "[pi, pj] -> { "
+            "[statement = 1, i, j] -> [l0 = 0, l1 = i, l2 = 0, l3 = j, l4 = 0] : "
+            "0 <= i < pi and 0 <= j < pj }"
+            )
+        isl_sched_map_after_expected = align_isl_maps_by_var_names(
+            isl_sched_map_after_expected, isl_sched_map_after)
+
+        assert isl_sched_map_before == isl_sched_map_before_expected
+        assert isl_sched_map_after == isl_sched_map_after_expected
+    elif lex_sched_BC.stmt_instance_before.stmt.int_id == 1:
+        # insn_c comes first
+        assert lex_sched_BC.stmt_instance_before.lex_pt == [0, 'i', 0, 'j', 1]
+        assert lex_sched_BC.stmt_instance_after.lex_pt == [0, 'i', 0, 'j', 0]
+
+        # Get two isl maps representing the LexSchedule
+
+        isl_sched_map_before, isl_sched_map_after = \
+             get_isl_maps_for_LexSchedule(lex_sched_BC, knl, "insn_b", "insn_c")
+
+        # Create expected maps, align, compare
+
+        isl_sched_map_before_expected = isl.Map(
+            "[pi, pj] -> { "
+            "[statement = 1, i, j] -> [l0 = 0, l1 = i, l2 = 0, l3 = j, l4 = 1] : "
+            "0 <= i < pi and 0 <= j < pj }"
+            )
+        isl_sched_map_before_expected = align_isl_maps_by_var_names(
+            isl_sched_map_before_expected, isl_sched_map_before)
+
+        isl_sched_map_after_expected = isl.Map(
+            "[pi, pj] -> { "
+            "[statement = 0, i, j] -> [l0 = 0, l1 = i, l2 = 0, l3 = j, l4 = 0] : "
+            "0 <= i < pi and 0 <= j < pj }"
+            )
+        isl_sched_map_after_expected = align_isl_maps_by_var_names(
+            isl_sched_map_after_expected, isl_sched_map_after)
+
+        assert isl_sched_map_before == isl_sched_map_before_expected
+        assert isl_sched_map_after == isl_sched_map_after_expected
+
+    # ------------------------------------------------------------------------------
+    # Relationship between insn_b and insn_d ---------------------------------------
+
+    assert lex_sched_BD.stmt_instance_before.lex_pt == [0, 'i', 0, 'j', 0]
+    assert lex_sched_BD.stmt_instance_after.lex_pt == [1, 't', 0, 0, 0]
+
+    # Get two isl maps representing the LexSchedule
+
+    isl_sched_map_before, isl_sched_map_after = \
+         get_isl_maps_for_LexSchedule(lex_sched_BD, knl, "insn_b", "insn_d")
+
+    # Create expected maps, align, compare
+
+    isl_sched_map_before_expected = isl.Map(
+        "[pi, pj] -> { "
+        "[statement = 0, i, j] -> [l0 = 0, l1 = i, l2 = 0, l3 = j, l4 = 0] : "
+        "0 <= i < pi and 0 <= j < pj }"
+        )
+    isl_sched_map_before_expected = align_isl_maps_by_var_names(
+        isl_sched_map_before_expected, isl_sched_map_before)
+
+    isl_sched_map_after_expected = isl.Map(
+        "[pt] -> { "
+        "[statement = 1, t] -> [l0 = 1, l1 = t, l2 = 0, l3 = 0, l4 = 0] : "
+        "0 <= t < pt }"
+        )
+    isl_sched_map_after_expected = align_isl_maps_by_var_names(
+        isl_sched_map_after_expected, isl_sched_map_after)
+
+    assert isl_sched_map_before == isl_sched_map_before_expected
+    assert isl_sched_map_after == isl_sched_map_after_expected
+
+    # ------------------------------------------------------------------------------
+    # Relationship between insn_c and insn_d ---------------------------------------
+
+    assert lex_sched_CD.stmt_instance_before.lex_pt == [0, 'i', 0, 'j', 0]
+    assert lex_sched_CD.stmt_instance_after.lex_pt == [1, 't', 0, 0, 0]
+
+    # Get two isl maps representing the LexSchedule
+
+    isl_sched_map_before, isl_sched_map_after = \
+         get_isl_maps_for_LexSchedule(lex_sched_CD, knl, "insn_c", "insn_d")
+
+    # Create expected maps, align, compare
+
+    isl_sched_map_before_expected = isl.Map(
+        "[pi, pj] -> { "
+        "[statement = 0, i, j] -> [l0 = 0, l1 = i, l2 = 0, l3 = j, l4 = 0] : "
+        "0 <= i < pi and 0 <= j < pj }"
+        )
+    isl_sched_map_before_expected = align_isl_maps_by_var_names(
+        isl_sched_map_before_expected, isl_sched_map_before)
+
+    isl_sched_map_after_expected = isl.Map(
+        "[pt] -> { "
+        "[statement = 1, t] -> [l0 = 1, l1 = t, l2 = 0, l3 = 0, l4 = 0] : "
+        "0 <= t < pt }"
+        )
+    isl_sched_map_after_expected = align_isl_maps_by_var_names(
+        isl_sched_map_after_expected, isl_sched_map_after)
+
+    assert isl_sched_map_before == isl_sched_map_before_expected
+    assert isl_sched_map_after == isl_sched_map_after_expected
+
+
+if __name__ == "__main__":
+    if len(sys.argv) > 1:
+        exec(sys.argv[1])
+    else:
+        from pytest import main
+        main([__file__])
+
+# vim: foldmethod=marker

From 17e14b19e0c59c4854e60b0509ba665b870e468e Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Mon, 13 Apr 2020 23:01:23 -0500
Subject: [PATCH 011/460] add docstrings for get_schedule_for_statement_pair()
 and get_isl_maps_for_LexSchedule()

---
 loopy/schedule/checker/__init__.py | 86 ++++++++++++++++++++++++------
 1 file changed, 70 insertions(+), 16 deletions(-)

diff --git a/loopy/schedule/checker/__init__.py b/loopy/schedule/checker/__init__.py
index 1acee56b9..99a555e00 100644
--- a/loopy/schedule/checker/__init__.py
+++ b/loopy/schedule/checker/__init__.py
@@ -20,6 +20,7 @@
 THE SOFTWARE.
 """
 
+# {{{ Create LexSchedule for statement pair
 
 def get_schedule_for_statement_pair(
         knl,
@@ -28,32 +29,52 @@ def get_schedule_for_statement_pair(
         insn_id_after,
         prohibited_var_names=set(),
         ):
-    """A set of dependencies between two statements.
-
-    .. arg insn_id_before: An instruction identifier that is unique within
+    """Create a :class:`loopy.schedule.checker.schedule.LexSchedule`
+        representing the order of two statements as a mapping from
+        :class:`loopy.schedule.checker.LexScheduleStatementInstance`
+        to lexicographic time.
+
+    :arg knl: A :class:`loopy.kernel.LoopKernel` containing the
+        linearization items that will be used to create a schedule.
+
+    :arg linearization_items: A list of :class:`loopy.schedule.ScheduleItem`
+        (to be renamed to `loopy.schedule.LinearizationItem`) containing
+        the two linearization items for which a schedule will be
+        created. This list may be a partial linearization for a
+        kernel since this function may be used during the linearization
+        process.
+
+    :arg insn_id_before: An instruction identifier that is unique within
         a :class:`loopy.kernel.LoopKernel`.
 
-    .. arg insn_id_after: An instruction identifier that is unique within
+    :arg insn_id_after: An instruction identifier that is unique within
         a :class:`loopy.kernel.LoopKernel`.
 
-    """
+    :arg prohibited_var_names: A set of :class:`str` representing
+        variable names that should not be used when creating names for
+        dimensions in a :class:`loopy.schedule.checker.LexSchedule`.
 
-    # We don't retrieve linearization items from knl because knl may not be
-    # (fully) linearized yet. This function may be called part way through the
-    # linearization process and receive the current (unfinished) set of
-    # linearization items
+    :returns: A :class:`loopy.schedule.checker.schedule.LexSchedule`
+        representing the order of two statements as a mapping from
+        :class:`loopy.schedule.checker.LexScheduleStatementInstance`
+        to lexicographic time.
+    """
 
-    # Preprocess if not already preprocessed
+    # {{{ Preprocess if not already preprocessed
     from loopy import preprocess_kernel
     preproc_knl = preprocess_kernel(knl)
+    # }}}
 
+    # {{{ By default, don't create LexSchedule variables matching existing inames
     if not prohibited_var_names:
         prohibited_var_names = preproc_knl.all_inames()
+    # }}}
 
-    # Get EnterLoop inames tagged as concurrent so LexSchedule can ignore
+    # {{{ Find any EnterLoop inames that are tagged as concurrent
+    # so that LexSchedule knows to ignore them
     # (In the future, this shouldn't be necessary because there
     #  won't be any inames with ConcurrentTags in EnterLoop linearization items.
-    #  Test exercising this: test_linearization_checker_with_stroud_bernstein())
+    #  Test which exercises this: test_linearization_checker_with_stroud_bernstein())
     from loopy.schedule.checker.utils import (
         get_concurrent_inames,
         _get_EnterLoop_inames,
@@ -67,8 +88,9 @@ def get_schedule_for_statement_pair(
             "get_schedule_for_statement_pair encountered EnterLoop for inames %s "
             "with ConcurrentTag(s) in linearization for kernel %s. "
             "Ignoring these loops." % (conc_loop_inames, preproc_knl.name))
+    # }}}
 
-    # Create LexSchedule: mapping of {statement instance: lex point}
+    # {{{ Create LexSchedule: mapping of {statement instance: lex point}
     # include only instructions involved in this dependency
     from loopy.schedule.checker.schedule import LexSchedule
     return LexSchedule(
@@ -78,27 +100,59 @@ def get_schedule_for_statement_pair(
         prohibited_var_names=prohibited_var_names,
         loops_to_ignore=conc_loop_inames,
         )
+    # }}}
+
+# }}}
 
 
+# {{{ Get isl map pair for LexSchedule
+
 def get_isl_maps_for_LexSchedule(
         lex_sched,
         knl,
         insn_id_before,
         insn_id_after,
         ):
-    # Get two isl maps representing the LexSchedule,
-    # one for the 'before' linearization item and one for 'after';
-    # this requires the iname domains
+    """Create a pair of :class:`islpy.Map`s representing a
+        :class:`loopy.schedule.checker.LexSchedule` as two mappings
+        from statement instances to lexicographic time, one for
+        the dependee statement and one for the depender.
+
+    :arg lex_sched: A :class:`loopy.schedule.checker.schedule.LexSchedule`
+        representing the order of two statements as a mapping from
+        :class:`loopy.schedule.checker.LexScheduleStatementInstance`
+        to lexicographic time.
+
+    :arg knl: A :class:`loopy.kernel.LoopKernel` containing the
+        linearization items that will be used to create a schedule.
 
+    :arg insn_id_before: An instruction identifier that is unique within
+        a :class:`loopy.kernel.LoopKernel`.
+
+    :arg insn_id_after: An instruction identifier that is unique within
+        a :class:`loopy.kernel.LoopKernel`.
+
+    :returns: A two-tuple containing two :class:`islpy.Map`s
+        representing the schedule as two mappings
+        from statement instances to lexicographic time, one for
+        the dependee and one for the depender.
+    """
+
+    # {{{ Get iname domains
     insn_before_inames = knl.id_to_insn[insn_id_before].within_inames
     insn_after_inames = knl.id_to_insn[insn_id_after].within_inames
     dom_before = knl.get_inames_domain(insn_before_inames)
     dom_after = knl.get_inames_domain(insn_after_inames)
+    # }}}
 
+    # {{{ Get isl maps
     isl_sched_map_before, isl_sched_map_after = \
         lex_sched.create_isl_maps(
             dom_before,
             dom_after,
         )
+    # }}}
 
     return isl_sched_map_before, isl_sched_map_after
+
+# }}}

From dc45709e6e9b117940f375c5fba8aeeb53f0d3b6 Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Mon, 13 Apr 2020 23:03:14 -0500
Subject: [PATCH 012/460] remove get_statement_ordering_map() (won't be part of
 this MR)

---
 .../checker/lexicographic_order_map.py        | 38 -------------------
 1 file changed, 38 deletions(-)

diff --git a/loopy/schedule/checker/lexicographic_order_map.py b/loopy/schedule/checker/lexicographic_order_map.py
index 2e063e7d7..399add0b3 100644
--- a/loopy/schedule/checker/lexicographic_order_map.py
+++ b/loopy/schedule/checker/lexicographic_order_map.py
@@ -23,44 +23,6 @@
 import islpy as isl
 
 
-def get_statement_ordering_map(
-        sched_map_before, sched_map_after, lex_map, before_marker="'"):
-    """Return a mapping that maps each statement instance to
-        all statement instances occuring later.
-
-    :arg sched_map_before: An :class:`islpy.Map` representing instruction
-        instance order for the dependee as a mapping from each statement
-        instance to a point in the lexicographic ordering.
-
-    :arg sched_map_after: An :class:`islpy.Map` representing instruction
-        instance order for the depender as a mapping from each statement
-        instance to a point in the lexicographic ordering.
-
-    :arg lex_map: An :class:`islpy.Map` representing a lexicographic
-        ordering as a mapping from each point in lexicographic time
-        to every point that occurs later in lexicographic time. E.g.::
-
-            {[i0', i1', i2', ...] -> [i0, i1, i2, ...] :
-                i0' < i0 or (i0' = i0 and i1' < i1)
-                or (i0' = i0 and i1' = i1 and i2' < i2) ...}
-
-    :returns: An :class:`islpy.Map` representing the lex schedule as
-        a mapping from each statement instance to all statement instances
-        occuring later. I.e., we compose B -> L -> A^-1, where B
-        is sched_map_before, A is sched_map_after, and L is the
-        lexicographic ordering map.
-
-    """
-
-    sio = sched_map_before.apply_range(
-        lex_map).apply_range(sched_map_after.reverse())
-    # append marker to in names
-    for i in range(sio.dim(isl.dim_type.in_)):
-        sio = sio.set_dim_name(isl.dim_type.in_, i, sio.get_dim_name(
-            isl.dim_type.in_, i)+before_marker)
-    return sio
-
-
 def get_lex_order_constraint(islvars, before_names, after_names):
     """Return a constraint represented as an :class:`islpy.Set`
         defining a 'happens before' relationship in a lexicographic

From ee59915fb3d0c163ffddafa82eede6497fb348cf Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Mon, 13 Apr 2020 23:07:34 -0500
Subject: [PATCH 013/460] remove lexicographic_order_map (not used in this MR)

---
 .../checker/lexicographic_order_map.py        | 121 ------------------
 1 file changed, 121 deletions(-)
 delete mode 100644 loopy/schedule/checker/lexicographic_order_map.py

diff --git a/loopy/schedule/checker/lexicographic_order_map.py b/loopy/schedule/checker/lexicographic_order_map.py
deleted file mode 100644
index 399add0b3..000000000
--- a/loopy/schedule/checker/lexicographic_order_map.py
+++ /dev/null
@@ -1,121 +0,0 @@
-__copyright__ = "Copyright (C) 2019 James Stevens"
-
-__license__ = """
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in
-all copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-THE SOFTWARE.
-"""
-
-import islpy as isl
-
-
-def get_lex_order_constraint(islvars, before_names, after_names):
-    """Return a constraint represented as an :class:`islpy.Set`
-        defining a 'happens before' relationship in a lexicographic
-        ordering.
-
-    :arg islvars: A dictionary from variable names to :class:`islpy.PwAff`
-        instances that represent each of the variables
-        (islvars may be produced by `islpy.make_zero_and_vars`). The key
-        '0' is also include and represents a :class:`islpy.PwAff` zero constant.
-        This dictionary defines the space to be used for the set.
-
-    :arg before_names: A list of :class:`str` variable names representing
-        the lexicographic space dimensions for a point in lexicographic
-        time that occurs before. (see example below)
-
-    :arg after_names: A list of :class:`str` variable names representing
-        the lexicographic space dimensions for a point in lexicographic
-        time that occurs after. (see example below)
-
-    :returns: An :class:`islpy.Set` representing a constraint that enforces a
-        lexicographic ordering. E.g., if ``before_names = [i0', i1', i2']`` and
-        ``after_names = [i0, i1, i2]``, return the set::
-
-            {[i0', i1', i2', i0, i1, i2] :
-                i0' < i0 or (i0' = i0 and i1' < i1)
-                or (i0' = i0 and i1' = i1 and i2' < i2)}
-
-    """
-
-    lex_order_constraint = islvars[before_names[0]].lt_set(islvars[after_names[0]])
-    for i in range(1, len(before_names)):
-        lex_order_constraint_conj = islvars[before_names[i]].lt_set(
-            islvars[after_names[i]])
-        for j in range(i):
-            lex_order_constraint_conj = lex_order_constraint_conj & \
-                islvars[before_names[j]].eq_set(islvars[after_names[j]])
-        lex_order_constraint = lex_order_constraint | lex_order_constraint_conj
-    return lex_order_constraint
-
-
-def create_lex_order_map(
-        n_dims,
-        before_names=None,
-        after_names=None,
-        ):
-    """Return a mapping that maps each point in a lexicographic
-        ordering to every point that occurs later in lexicographic
-        time.
-
-    :arg n_dims: An :class:`int` representing the number of dimensions
-        in the lexicographic ordering.
-
-    :arg before_names: A list of :class:`str` variable names representing
-        the lexicographic space dimensions for a point in lexicographic
-        time that occurs before. (see example below)
-
-    :arg after_names: A list of :class:`str` variable names representing
-        the lexicographic space dimensions for a point in lexicographic
-        time that occurs after. (see example below)
-
-    :returns: An :class:`islpy.Map` representing a lexicographic
-        ordering as a mapping from each point in lexicographic time
-        to every point that occurs later in lexicographic time.
-        E.g., if ``before_names = [i0', i1', i2']`` and
-        ``after_names = [i0, i1, i2]``, return the map::
-
-            {[i0', i1', i2'] -> [i0, i1, i2] :
-                i0' < i0 or (i0' = i0 and i1' < i1)
-                or (i0' = i0 and i1' = i1 and i2' < i2)}
-
-    """
-
-    if before_names is None:
-        before_names = ["i%s" % (i) for i in range(n_dims)]
-    if after_names is None:
-        from loopy.schedule.checker.utils import (
-            append_marker_to_strings,
-        )
-        after_names = append_marker_to_strings(before_names, marker="_")
-
-    assert len(before_names) == len(after_names) == n_dims
-    dim_type = isl.dim_type
-
-    islvars = isl.make_zero_and_vars(
-            before_names+after_names,
-            [])
-
-    lex_order_constraint = get_lex_order_constraint(
-        islvars, before_names, after_names)
-
-    lex_map = isl.Map.from_domain(lex_order_constraint)
-    lex_map = lex_map.move_dims(
-        dim_type.out, 0, dim_type.in_,
-        len(before_names), len(after_names))
-
-    return lex_map

From 5401383198da8c7ba6fc840d075a23297379e76a Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Mon, 13 Apr 2020 23:09:52 -0500
Subject: [PATCH 014/460] remove get_lex_order_map_for_sched_space() (not part
 of this MR)

---
 loopy/schedule/checker/schedule.py | 13 -------------
 1 file changed, 13 deletions(-)

diff --git a/loopy/schedule/checker/schedule.py b/loopy/schedule/checker/schedule.py
index c395863ef..199a5deda 100644
--- a/loopy/schedule/checker/schedule.py
+++ b/loopy/schedule/checker/schedule.py
@@ -458,19 +458,6 @@ def get_lex_var_names(self):
         return [self.lex_var_prefix+str(i)
                 for i in range(self.max_lex_dims())]
 
-    def get_lex_order_map_for_sched_space(self):
-        """Return an :class:`islpy.BasicMap` that maps each point in a
-            lexicographic ordering to every point that is
-            lexocigraphically greater.
-        """
-
-        from loopy.schedule.checker.lexicographic_order_map import (
-            create_lex_order_map,
-        )
-        n_dims = self.max_lex_dims()
-        return create_lex_order_map(
-            n_dims, before_names=self.get_lex_var_names())
-
     def __eq__(self, other):
         return (
             self.stmt_instance_before == other.stmt_instance_before

From 7e94f4beafa84aa525a7eb5c6177c7f12b498d19 Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Mon, 13 Apr 2020 23:14:12 -0500
Subject: [PATCH 015/460] remove more methods from LexSchedule that are not
 used in this MR (loopy_insn_id_to_lex_sched_id and __eq__)

---
 loopy/schedule/checker/schedule.py | 17 -----------------
 1 file changed, 17 deletions(-)

diff --git a/loopy/schedule/checker/schedule.py b/loopy/schedule/checker/schedule.py
index 199a5deda..ed168ae5b 100644
--- a/loopy/schedule/checker/schedule.py
+++ b/loopy/schedule/checker/schedule.py
@@ -306,18 +306,6 @@ def __init__(
         # the values in these missing dims should be zero, so add them
         self.pad_lex_pts_with_zeros()
 
-    def loopy_insn_id_to_lex_sched_id(self):
-        """Return a dictionary mapping insn_id to int_id, where ``insn_id`` and
-            ``int_id`` refer to the ``insn_id`` and ``int_id`` attributes of
-            :class:`LexScheduleStatement`.
-        """
-        return {
-            self.stmt_instance_before.stmt.insn_id:
-                self.stmt_instance_before.stmt.int_id,
-            self.stmt_instance_after.stmt.insn_id:
-                self.stmt_instance_after.stmt.int_id,
-            }
-
     def max_lex_dims(self):
         return max([
             len(self.stmt_instance_before.lex_pt),
@@ -458,11 +446,6 @@ def get_lex_var_names(self):
         return [self.lex_var_prefix+str(i)
                 for i in range(self.max_lex_dims())]
 
-    def __eq__(self, other):
-        return (
-            self.stmt_instance_before == other.stmt_instance_before
-            and self.stmt_instance_after == other.stmt_instance_after)
-
     def __str__(self):
         sched_str = "Before: {\n"
         domain_elem = "[%s=%s,<inames>]" % (

From 16afc55bec1d072783e7b2a0e5eae6963e7506fe Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Mon, 13 Apr 2020 23:21:08 -0500
Subject: [PATCH 016/460] remove another func from utils that is not used in
 this MR (append_marker_to_strings)

---
 loopy/schedule/checker/utils.py | 7 -------
 1 file changed, 7 deletions(-)

diff --git a/loopy/schedule/checker/utils.py b/loopy/schedule/checker/utils.py
index 8757406b7..cb933de6f 100644
--- a/loopy/schedule/checker/utils.py
+++ b/loopy/schedule/checker/utils.py
@@ -143,13 +143,6 @@ def align_isl_maps_by_var_names(input_map, target_map):
     return aligned_input_map
 
 
-def append_marker_to_strings(strings, marker="'"):
-    if not isinstance(strings, list):
-        raise ValueError("append_marker_to_strings did not receive a list")
-    else:
-        return [s+marker for s in strings]
-
-
 def _union_of_isl_sets_or_maps(set_list):
     union = set_list[0]
     for s in set_list[1:]:

From d2f94b7cf6cee1d0f0a24b72209cff6be074852c Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Mon, 13 Apr 2020 23:41:53 -0500
Subject: [PATCH 017/460] remove LexScheduleStatement methods not needed in
 this MR

---
 loopy/schedule/checker/schedule.py | 10 ----------
 1 file changed, 10 deletions(-)

diff --git a/loopy/schedule/checker/schedule.py b/loopy/schedule/checker/schedule.py
index ed168ae5b..542c48f95 100644
--- a/loopy/schedule/checker/schedule.py
+++ b/loopy/schedule/checker/schedule.py
@@ -51,16 +51,6 @@ def __init__(
         self.int_id = int_id
         self.within_inames = within_inames
 
-    def __eq__(self, other):
-        return (
-            self.insn_id == other.insn_id
-            and self.int_id == other.int_id
-            and self.within_inames == other.within_inames
-            )
-
-    def __hash__(self):
-        return hash(repr(self))
-
     def update_persistent_hash(self, key_hash, key_builder):
         """Custom hash computation function for use with
         :class:`pytools.persistent_dict.PersistentDict`.

From aa44009f367adb810ee3686047d2fbbec13135e5 Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Mon, 13 Apr 2020 23:47:49 -0500
Subject: [PATCH 018/460] fixing flake8 issues

---
 loopy/schedule/checker/__init__.py |  1 +
 loopy/schedule/checker/schedule.py | 20 +++++-----
 test/test_linearization_checker.py | 59 +++++++++++++++---------------
 3 files changed, 40 insertions(+), 40 deletions(-)

diff --git a/loopy/schedule/checker/__init__.py b/loopy/schedule/checker/__init__.py
index 99a555e00..2911351b2 100644
--- a/loopy/schedule/checker/__init__.py
+++ b/loopy/schedule/checker/__init__.py
@@ -20,6 +20,7 @@
 THE SOFTWARE.
 """
 
+
 # {{{ Create LexSchedule for statement pair
 
 def get_schedule_for_statement_pair(
diff --git a/loopy/schedule/checker/schedule.py b/loopy/schedule/checker/schedule.py
index 542c48f95..39c8c1161 100644
--- a/loopy/schedule/checker/schedule.py
+++ b/loopy/schedule/checker/schedule.py
@@ -177,9 +177,6 @@ def __init__(
         next_insn_lex_pt = [0]
         stmt_since_last_block_at_tier = [False]
         next_sid = 0
-        stmt_added_since_last_EnterLoop = False
-        stmt_added_since_last_LeaveLoop = False
-        #stmt_added_since_last_new_block = False  # blocks start at open/close loop
         for linearization_item in linearization_items_ordered:
             if isinstance(linearization_item, EnterLoop):
                 iname = linearization_item.iname
@@ -188,8 +185,8 @@ def __init__(
 
                 # We could always increment next_insn_lex_pt[-1] here since this new
                 # section of code comes after the previous section (statements
-                # since last opened/closed loop), but if we have not added any statements
-                # within this block yet, we don't have to
+                # since last opened/closed loop), but if we have not added any
+                # statements within this block yet, we don't have to
                 # (effectively ignoring that section of code).
                 if stmt_since_last_block_at_tier[-1]:
                     next_insn_lex_pt[-1] = next_insn_lex_pt[-1]+1
@@ -214,8 +211,8 @@ def __init__(
 
                 # We could always increment next_insn_lex_pt[-1] here since this new
                 # block of code comes after the previous block (all statements
-                # since last opened/closed loop), but if we have not added any statements
-                # within this block yet, we don't have to
+                # since last opened/closed loop), but if we have not added any
+                # statements within this block yet, we don't have to
                 # (effectively ignoring that section of code).
                 stmt_since_last_block_at_tier.pop()
                 if stmt_since_last_block_at_tier[-1]:
@@ -255,7 +252,8 @@ def __init__(
                     next_sid += 1
 
                     # all current (nested) blocks now contain a statement
-                    stmt_since_last_block_at_tier = [True]*len(stmt_since_last_block_at_tier)
+                    stmt_since_last_block_at_tier = [True]*len(
+                        stmt_since_last_block_at_tier)
                 elif lp_insn_id == before_insn_id:
                     # add before sched item
                     self.stmt_instance_before = LexScheduleStatementInstance(
@@ -270,7 +268,8 @@ def __init__(
                     next_sid += 1
 
                     # all current (nested) blocks now contain a statement
-                    stmt_since_last_block_at_tier = [True]*len(stmt_since_last_block_at_tier)
+                    stmt_since_last_block_at_tier = [True]*len(
+                        stmt_since_last_block_at_tier)
                 elif lp_insn_id == after_insn_id:
                     # add after sched item
                     self.stmt_instance_after = LexScheduleStatementInstance(
@@ -285,7 +284,8 @@ def __init__(
                     next_sid += 1
 
                     # all current (nested) blocks now contain a statement
-                    stmt_since_last_block_at_tier = [True]*len(stmt_since_last_block_at_tier)
+                    stmt_since_last_block_at_tier = [True]*len(
+                        stmt_since_last_block_at_tier)
             else:
                 pass
             # to save time, stop when we've created both statements
diff --git a/test/test_linearization_checker.py b/test/test_linearization_checker.py
index 7e145ce1d..68688f0df 100644
--- a/test/test_linearization_checker.py
+++ b/test/test_linearization_checker.py
@@ -31,7 +31,6 @@
     as pytest_generate_tests)
 from loopy.version import LOOPY_USE_LANGUAGE_VERSION_2018_2  # noqa
 import logging
-from loopy.kernel import KernelState
 from loopy import (
     preprocess_kernel,
     get_one_linearized_kernel,
@@ -95,37 +94,37 @@ def test_lexschedule_and_islmap_creation():
     linearization_items = knl.linearization
 
     # Create LexSchedule: mapping of {statement instance: lex point}
-    lex_sched_AB = get_schedule_for_statement_pair(
+    lex_sched_ab = get_schedule_for_statement_pair(
         knl,
         linearization_items,
         "insn_a",
         "insn_b",
         )
-    lex_sched_AC = get_schedule_for_statement_pair(
+    lex_sched_ac = get_schedule_for_statement_pair(
         knl,
         linearization_items,
         "insn_a",
         "insn_c",
         )
-    lex_sched_AD = get_schedule_for_statement_pair(
+    lex_sched_ad = get_schedule_for_statement_pair(
         knl,
         linearization_items,
         "insn_a",
         "insn_d",
         )
-    lex_sched_BC = get_schedule_for_statement_pair(
+    lex_sched_bc = get_schedule_for_statement_pair(
         knl,
         linearization_items,
         "insn_b",
         "insn_c",
         )
-    lex_sched_BD = get_schedule_for_statement_pair(
+    lex_sched_bd = get_schedule_for_statement_pair(
         knl,
         linearization_items,
         "insn_b",
         "insn_d",
         )
-    lex_sched_CD = get_schedule_for_statement_pair(
+    lex_sched_cd = get_schedule_for_statement_pair(
         knl,
         linearization_items,
         "insn_c",
@@ -134,13 +133,13 @@ def test_lexschedule_and_islmap_creation():
 
     # Relationship between insn_a and insn_b ---------------------------------------
 
-    assert lex_sched_AB.stmt_instance_before.lex_pt == [0, 'i', 0, 'k', 0]
-    assert lex_sched_AB.stmt_instance_after.lex_pt == [0, 'i', 1, 'j', 0]
+    assert lex_sched_ab.stmt_instance_before.lex_pt == [0, 'i', 0, 'k', 0]
+    assert lex_sched_ab.stmt_instance_after.lex_pt == [0, 'i', 1, 'j', 0]
 
     # Get two isl maps representing the LexSchedule
 
     isl_sched_map_before, isl_sched_map_after = \
-         get_isl_maps_for_LexSchedule(lex_sched_AB, knl, "insn_a", "insn_b")
+         get_isl_maps_for_LexSchedule(lex_sched_ab, knl, "insn_a", "insn_b")
 
     # Create expected maps, align, compare
 
@@ -166,13 +165,13 @@ def test_lexschedule_and_islmap_creation():
     # ------------------------------------------------------------------------------
     # Relationship between insn_a and insn_c ---------------------------------------
 
-    assert lex_sched_AC.stmt_instance_before.lex_pt == [0, 'i', 0, 'k', 0]
-    assert lex_sched_AC.stmt_instance_after.lex_pt == [0, 'i', 1, 'j', 0]
+    assert lex_sched_ac.stmt_instance_before.lex_pt == [0, 'i', 0, 'k', 0]
+    assert lex_sched_ac.stmt_instance_after.lex_pt == [0, 'i', 1, 'j', 0]
 
     # Get two isl maps representing the LexSchedule
 
     isl_sched_map_before, isl_sched_map_after = \
-         get_isl_maps_for_LexSchedule(lex_sched_AC, knl, "insn_a", "insn_c")
+         get_isl_maps_for_LexSchedule(lex_sched_ac, knl, "insn_a", "insn_c")
 
     # Create expected maps, align, compare
 
@@ -198,13 +197,13 @@ def test_lexschedule_and_islmap_creation():
     # ------------------------------------------------------------------------------
     # Relationship between insn_a and insn_d ---------------------------------------
 
-    assert lex_sched_AD.stmt_instance_before.lex_pt == [0, 'i', 0, 'k', 0]
-    assert lex_sched_AD.stmt_instance_after.lex_pt == [1, 't', 0, 0, 0]
+    assert lex_sched_ad.stmt_instance_before.lex_pt == [0, 'i', 0, 'k', 0]
+    assert lex_sched_ad.stmt_instance_after.lex_pt == [1, 't', 0, 0, 0]
 
     # Get two isl maps representing the LexSchedule
 
     isl_sched_map_before, isl_sched_map_after = \
-         get_isl_maps_for_LexSchedule(lex_sched_AD, knl, "insn_a", "insn_d")
+         get_isl_maps_for_LexSchedule(lex_sched_ad, knl, "insn_a", "insn_d")
 
     # Create expected maps, align, compare
 
@@ -231,15 +230,15 @@ def test_lexschedule_and_islmap_creation():
     # Relationship between insn_b and insn_c ---------------------------------------
 
     # insn_b and insn_c could have been linearized in either order
-    if lex_sched_BC.stmt_instance_before.stmt.int_id == 0:
+    if lex_sched_bc.stmt_instance_before.stmt.int_id == 0:
         # insn_c comes first
-        assert lex_sched_BC.stmt_instance_before.lex_pt == [0, 'i', 0, 'j', 0]
-        assert lex_sched_BC.stmt_instance_after.lex_pt == [0, 'i', 0, 'j', 1]
+        assert lex_sched_bc.stmt_instance_before.lex_pt == [0, 'i', 0, 'j', 0]
+        assert lex_sched_bc.stmt_instance_after.lex_pt == [0, 'i', 0, 'j', 1]
 
         # Get two isl maps representing the LexSchedule
 
         isl_sched_map_before, isl_sched_map_after = \
-             get_isl_maps_for_LexSchedule(lex_sched_BC, knl, "insn_b", "insn_c")
+             get_isl_maps_for_LexSchedule(lex_sched_bc, knl, "insn_b", "insn_c")
 
         # Create expected maps, align, compare
 
@@ -261,15 +260,15 @@ def test_lexschedule_and_islmap_creation():
 
         assert isl_sched_map_before == isl_sched_map_before_expected
         assert isl_sched_map_after == isl_sched_map_after_expected
-    elif lex_sched_BC.stmt_instance_before.stmt.int_id == 1:
+    elif lex_sched_bc.stmt_instance_before.stmt.int_id == 1:
         # insn_c comes first
-        assert lex_sched_BC.stmt_instance_before.lex_pt == [0, 'i', 0, 'j', 1]
-        assert lex_sched_BC.stmt_instance_after.lex_pt == [0, 'i', 0, 'j', 0]
+        assert lex_sched_bc.stmt_instance_before.lex_pt == [0, 'i', 0, 'j', 1]
+        assert lex_sched_bc.stmt_instance_after.lex_pt == [0, 'i', 0, 'j', 0]
 
         # Get two isl maps representing the LexSchedule
 
         isl_sched_map_before, isl_sched_map_after = \
-             get_isl_maps_for_LexSchedule(lex_sched_BC, knl, "insn_b", "insn_c")
+             get_isl_maps_for_LexSchedule(lex_sched_bc, knl, "insn_b", "insn_c")
 
         # Create expected maps, align, compare
 
@@ -295,13 +294,13 @@ def test_lexschedule_and_islmap_creation():
     # ------------------------------------------------------------------------------
     # Relationship between insn_b and insn_d ---------------------------------------
 
-    assert lex_sched_BD.stmt_instance_before.lex_pt == [0, 'i', 0, 'j', 0]
-    assert lex_sched_BD.stmt_instance_after.lex_pt == [1, 't', 0, 0, 0]
+    assert lex_sched_bd.stmt_instance_before.lex_pt == [0, 'i', 0, 'j', 0]
+    assert lex_sched_bd.stmt_instance_after.lex_pt == [1, 't', 0, 0, 0]
 
     # Get two isl maps representing the LexSchedule
 
     isl_sched_map_before, isl_sched_map_after = \
-         get_isl_maps_for_LexSchedule(lex_sched_BD, knl, "insn_b", "insn_d")
+         get_isl_maps_for_LexSchedule(lex_sched_bd, knl, "insn_b", "insn_d")
 
     # Create expected maps, align, compare
 
@@ -327,13 +326,13 @@ def test_lexschedule_and_islmap_creation():
     # ------------------------------------------------------------------------------
     # Relationship between insn_c and insn_d ---------------------------------------
 
-    assert lex_sched_CD.stmt_instance_before.lex_pt == [0, 'i', 0, 'j', 0]
-    assert lex_sched_CD.stmt_instance_after.lex_pt == [1, 't', 0, 0, 0]
+    assert lex_sched_cd.stmt_instance_before.lex_pt == [0, 'i', 0, 'j', 0]
+    assert lex_sched_cd.stmt_instance_after.lex_pt == [1, 't', 0, 0, 0]
 
     # Get two isl maps representing the LexSchedule
 
     isl_sched_map_before, isl_sched_map_after = \
-         get_isl_maps_for_LexSchedule(lex_sched_CD, knl, "insn_c", "insn_d")
+         get_isl_maps_for_LexSchedule(lex_sched_cd, knl, "insn_c", "insn_d")
 
     # Create expected maps, align, compare
 

From 08e7342a919856a1f2eee7d1836c65fcdca61655 Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Tue, 14 Apr 2020 00:29:02 -0500
Subject: [PATCH 019/460] adding LexScheduleStatement.__eq__ back in to see if
 it fixes caching error

---
 loopy/schedule/checker/schedule.py | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/loopy/schedule/checker/schedule.py b/loopy/schedule/checker/schedule.py
index 39c8c1161..b5f9b4256 100644
--- a/loopy/schedule/checker/schedule.py
+++ b/loopy/schedule/checker/schedule.py
@@ -51,6 +51,13 @@ def __init__(
         self.int_id = int_id
         self.within_inames = within_inames
 
+    def __eq__(self, other):
+        return (
+            self.insn_id == other.insn_id
+            and self.int_id == other.int_id
+            and self.within_inames == other.within_inames
+            )
+
     def update_persistent_hash(self, key_hash, key_builder):
         """Custom hash computation function for use with
         :class:`pytools.persistent_dict.PersistentDict`.

From 5f53d50fe0b38c288c1fd1a5a681d24a317f6e8d Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Fri, 17 Apr 2020 03:26:18 -0500
Subject: [PATCH 020/460] eliminate duplicate code in sched.__str__

---
 loopy/schedule/checker/schedule.py | 24 ++++++++++--------------
 1 file changed, 10 insertions(+), 14 deletions(-)

diff --git a/loopy/schedule/checker/schedule.py b/loopy/schedule/checker/schedule.py
index b5f9b4256..af35bd68e 100644
--- a/loopy/schedule/checker/schedule.py
+++ b/loopy/schedule/checker/schedule.py
@@ -444,17 +444,13 @@ def get_lex_var_names(self):
                 for i in range(self.max_lex_dims())]
 
     def __str__(self):
-        sched_str = "Before: {\n"
-        domain_elem = "[%s=%s,<inames>]" % (
-            self.statement_var_name,
-            self.stmt_instance_before.stmt.int_id)
-        sched_str += "%s -> %s;\n" % (domain_elem, self.stmt_instance_before.lex_pt)
-        sched_str += "}\n"
-
-        sched_str += "After: {\n"
-        domain_elem = "[%s=%s,<inames>]" % (
-            self.statement_var_name,
-            self.stmt_instance_after.stmt.int_id)
-        sched_str += "%s -> %s;\n" % (domain_elem, self.stmt_instance_after.lex_pt)
-        sched_str += "}"
-        return sched_str
+
+        def stringify_sched_stmt_instance(stmt_inst):
+            return "{\n[%s=%s,<inames>] -> %s;\n}" % (
+                self.statement_var_name,
+                stmt_inst.stmt.int_id,
+                stmt_inst.lex_pt)
+
+        return "Before: %s\nAfter: %s" % (
+            stringify_sched_stmt_instance(self.stmt_instance_before),
+            stringify_sched_stmt_instance(self.stmt_instance_after))

From 5441bfa239a49567c63200ff24f1e0f2e45e4705 Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Fri, 17 Apr 2020 04:27:41 -0500
Subject: [PATCH 021/460] eliminate duplicate code in sched.create_isl_maps

---
 loopy/schedule/checker/schedule.py | 110 ++++++++++++-----------------
 1 file changed, 46 insertions(+), 64 deletions(-)

diff --git a/loopy/schedule/checker/schedule.py b/loopy/schedule/checker/schedule.py
index af35bd68e..8ca42e83d 100644
--- a/loopy/schedule/checker/schedule.py
+++ b/loopy/schedule/checker/schedule.py
@@ -365,79 +365,61 @@ def create_isl_maps(
         """
 
         from loopy.schedule.checker.utils import (
+            list_var_names_in_isl_sets,
+            get_isl_space,
             create_symbolic_isl_map_from_tuples,
-            add_dims_to_isl_set
+            add_dims_to_isl_set,
         )
 
-        from loopy.schedule.checker.utils import (
-            list_var_names_in_isl_sets,
-        )
-        if dom_inames_ordered_before is None:
-            dom_inames_ordered_before = list_var_names_in_isl_sets(
-                [dom_before])
-        if dom_inames_ordered_after is None:
-            dom_inames_ordered_after = list_var_names_in_isl_sets(
-                [dom_after])
-
-        # create an isl space
-        # {('statement', <inames> used in >=1 statement domain>) ->
-        #  (lexicographic ordering dims)}
-        from loopy.schedule.checker.utils import (
-            get_isl_space
-        )
         params_sched = []
         out_names_sched = self.get_lex_var_names()
 
-        in_names_sched_before = [
-            self.statement_var_name] + dom_inames_ordered_before[:]
-        sched_space_before = get_isl_space(
-            params_sched, in_names_sched_before, out_names_sched)
-        in_names_sched_after = [
-            self.statement_var_name] + dom_inames_ordered_after[:]
-        sched_space_after = get_isl_space(
-            params_sched, in_names_sched_after, out_names_sched)
-
-        # Insert 'statement' dim into domain so that its space allows for
-        # intersection with sched map later
-        doms_to_intersect_before = [
-                add_dims_to_isl_set(
-                    dom_before, isl.dim_type.set,
-                    [self.statement_var_name], 0),
-                ]
-        doms_to_intersect_after = [
-                add_dims_to_isl_set(
-                    dom_after, isl.dim_type.set,
-                    [self.statement_var_name], 0),
-                ]
+        def _get_isl_map_for_stmt_inst(
+                stmt_inst, dom, dom_inames_ordered):
 
-        # Each isl map representing the schedule maps
-        # statement instances -> lex time
+            # create an isl space
+            # {('statement', <inames> used in statement domain>) ->
+            #  (lexicographic ordering dims)}
+            if dom_inames_ordered is None:
+                dom_inames_ordered = list_var_names_in_isl_sets([dom])
 
-        # Right now, statement tuples consist of single int.
-        # Add all inames from domains to map domain tuples.
+            in_names_sched = [
+                self.statement_var_name] + dom_inames_ordered[:]
+            sched_space = get_isl_space(
+                params_sched, in_names_sched, out_names_sched)
 
-        # create isl map
-        return (
-            create_symbolic_isl_map_from_tuples(
-                zip(
-                    [(
-                        (self.stmt_instance_before.stmt.int_id,)
-                        + tuple(dom_inames_ordered_before),
-                        self.stmt_instance_before.lex_pt
-                    )],
-                    doms_to_intersect_before
-                ),
-                sched_space_before, self.statement_var_name),
-            create_symbolic_isl_map_from_tuples(
-                zip(
-                    [(
-                        (self.stmt_instance_after.stmt.int_id,)
-                        + tuple(dom_inames_ordered_after),
-                        self.stmt_instance_after.lex_pt)],
-                    doms_to_intersect_after
-                ),
-                sched_space_after, self.statement_var_name)
-            )
+            # Insert 'statement' dim into domain so that its space allows
+            # for intersection with sched map later
+            dom_to_intersect = [
+                add_dims_to_isl_set(
+                    dom, isl.dim_type.set, [self.statement_var_name], 0), ]
+
+            # Each isl map representing the schedule will map
+            # statement instances -> lex time.
+            # Right now, statement instance tuples consist of single int.
+            # Add all inames from domains to each map domain tuple.
+            tuple_pair = [(
+                (stmt_inst.stmt.int_id, ) + tuple(dom_inames_ordered),
+                stmt_inst.lex_pt
+                )]
+
+            # create isl map
+            return create_symbolic_isl_map_from_tuples(
+                tuple_pairs_with_domains=zip(tuple_pair, dom_to_intersect),
+                space=sched_space,
+                statement_var_name=self.statement_var_name,
+                )
+
+        map_before = _get_isl_map_for_stmt_inst(
+            self.stmt_instance_before,
+            dom_before,
+            dom_inames_ordered_before)
+        map_after = _get_isl_map_for_stmt_inst(
+            self.stmt_instance_after,
+            dom_after,
+            dom_inames_ordered_after)
+
+        return (map_before, map_after)
 
     def get_lex_var_names(self):
         return [self.lex_var_prefix+str(i)

From 31c68b724c2c9d6bf1fdadc703b3e6a60c8a6cff Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Fri, 17 Apr 2020 04:34:33 -0500
Subject: [PATCH 022/460] eliminate duplicate code in
 sched.pad_lex_pts_with_zeros

---
 loopy/schedule/checker/schedule.py | 21 +++++++++++----------
 1 file changed, 11 insertions(+), 10 deletions(-)

diff --git a/loopy/schedule/checker/schedule.py b/loopy/schedule/checker/schedule.py
index 8ca42e83d..4e59fdb7f 100644
--- a/loopy/schedule/checker/schedule.py
+++ b/loopy/schedule/checker/schedule.py
@@ -316,17 +316,18 @@ def pad_lex_pts_with_zeros(self):
             dimensions.
         """
 
+        def _pad_lex_pt_with_zeros(stmt_inst, length):
+            return LexScheduleStatementInstance(
+                stmt_inst.stmt,
+                stmt_inst.lex_pt[:] + [0]*(length-len(stmt_inst.lex_pt)),
+                )
+
         max_lex_dim = self.max_lex_dims()
-        self.stmt_instance_before = LexScheduleStatementInstance(
-            self.stmt_instance_before.stmt,
-            self.stmt_instance_before.lex_pt[:] + [0]*(
-                max_lex_dim-len(self.stmt_instance_before.lex_pt))
-            )
-        self.stmt_instance_after = LexScheduleStatementInstance(
-            self.stmt_instance_after.stmt,
-            self.stmt_instance_after.lex_pt[:] + [0]*(
-                max_lex_dim-len(self.stmt_instance_after.lex_pt))
-            )
+
+        self.stmt_instance_before = _pad_lex_pt_with_zeros(
+            self.stmt_instance_before, max_lex_dim)
+        self.stmt_instance_after = _pad_lex_pt_with_zeros(
+            self.stmt_instance_after, max_lex_dim)
 
     def create_isl_maps(
             self,

From da919bfbe6cf9c5128d100caaf581f2b04c76ad0 Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Fri, 17 Apr 2020 04:57:29 -0500
Subject: [PATCH 023/460] eliminate duplicate code in sched.__init__

---
 loopy/schedule/checker/schedule.py | 65 ++++++++++--------------------
 1 file changed, 22 insertions(+), 43 deletions(-)

diff --git a/loopy/schedule/checker/schedule.py b/loopy/schedule/checker/schedule.py
index 4e59fdb7f..0aca588c3 100644
--- a/loopy/schedule/checker/schedule.py
+++ b/loopy/schedule/checker/schedule.py
@@ -182,7 +182,7 @@ def __init__(
         # keep track of the next point in our lexicographic ordering
         # initially this as a 1-d point with value 0
         next_insn_lex_pt = [0]
-        stmt_since_last_block_at_tier = [False]
+        stmt_added_since_prev_block_at_tier = [False]
         next_sid = 0
         for linearization_item in linearization_items_ordered:
             if isinstance(linearization_item, EnterLoop):
@@ -193,19 +193,20 @@ def __init__(
                 # We could always increment next_insn_lex_pt[-1] here since this new
                 # section of code comes after the previous section (statements
                 # since last opened/closed loop), but if we have not added any
-                # statements within this block yet, we don't have to
+                # statements within the previous section yet, we don't have to
                 # (effectively ignoring that section of code).
-                if stmt_since_last_block_at_tier[-1]:
+                if stmt_added_since_prev_block_at_tier[-1]:
                     next_insn_lex_pt[-1] = next_insn_lex_pt[-1]+1
-                    stmt_since_last_block_at_tier[-1] = False
+                    stmt_added_since_prev_block_at_tier[-1] = False
 
                 # upon entering a loop, we enter a new (deeper) tier,
                 # add one lex dimension for the loop variable,
                 # add second lex dim to enumerate code blocks within new loop, and
-                # append a dim to stmt_since_last_block_at_tier to represent new tier
+                # append a dim to stmt_added_since_prev_block_at_tier to represent
+                # new tier
                 next_insn_lex_pt.append(iname)
                 next_insn_lex_pt.append(0)
-                stmt_since_last_block_at_tier.append(False)
+                stmt_added_since_prev_block_at_tier.append(False)
             elif isinstance(linearization_item, LeaveLoop):
                 if linearization_item.iname in loops_to_ignore:
                     continue
@@ -219,12 +220,12 @@ def __init__(
                 # We could always increment next_insn_lex_pt[-1] here since this new
                 # block of code comes after the previous block (all statements
                 # since last opened/closed loop), but if we have not added any
-                # statements within this block yet, we don't have to
+                # statements within the previous section yet, we don't have to
                 # (effectively ignoring that section of code).
-                stmt_since_last_block_at_tier.pop()
-                if stmt_since_last_block_at_tier[-1]:
+                stmt_added_since_prev_block_at_tier.pop()
+                if stmt_added_since_prev_block_at_tier[-1]:
                     next_insn_lex_pt[-1] = next_insn_lex_pt[-1]+1
-                    stmt_since_last_block_at_tier[-1] = False
+                    stmt_added_since_prev_block_at_tier[-1] = False
             elif isinstance(linearization_item, (RunInstruction, Barrier)):
                 from loopy.schedule.checker.utils import (
                     _get_insn_id_from_linearization_item,
@@ -238,30 +239,9 @@ def __init__(
                     continue
 
                 # only process before/after insns, otherwise ignore
-                if lp_insn_id == before_insn_id and lp_insn_id == after_insn_id:
-                    # add before sched item
-                    self.stmt_instance_before = LexScheduleStatementInstance(
-                            LexScheduleStatement(
-                                insn_id=lp_insn_id,
-                                int_id=next_sid,  # int representing insn
-                                ),
-                            next_insn_lex_pt[:])
-                    # add after sched item
-                    self.stmt_instance_after = LexScheduleStatementInstance(
-                            LexScheduleStatement(
-                                insn_id=lp_insn_id,
-                                int_id=next_sid,  # int representing insn
-                                ),
-                            next_insn_lex_pt[:])
-
-                    # increment lex dim val enumerating items in current code block
-                    next_insn_lex_pt[-1] = next_insn_lex_pt[-1] + 1
-                    next_sid += 1
+                stmt_added = False
 
-                    # all current (nested) blocks now contain a statement
-                    stmt_since_last_block_at_tier = [True]*len(
-                        stmt_since_last_block_at_tier)
-                elif lp_insn_id == before_insn_id:
+                if lp_insn_id == before_insn_id:
                     # add before sched item
                     self.stmt_instance_before = LexScheduleStatementInstance(
                             LexScheduleStatement(
@@ -269,15 +249,9 @@ def __init__(
                                 int_id=next_sid,  # int representing insn
                                 ),
                             next_insn_lex_pt[:])
+                    stmt_added = True
 
-                    # increment lex dim val enumerating items in current code block
-                    next_insn_lex_pt[-1] = next_insn_lex_pt[-1] + 1
-                    next_sid += 1
-
-                    # all current (nested) blocks now contain a statement
-                    stmt_since_last_block_at_tier = [True]*len(
-                        stmt_since_last_block_at_tier)
-                elif lp_insn_id == after_insn_id:
+                if lp_insn_id == after_insn_id:
                     # add after sched item
                     self.stmt_instance_after = LexScheduleStatementInstance(
                             LexScheduleStatement(
@@ -285,14 +259,19 @@ def __init__(
                                 int_id=next_sid,  # int representing insn
                                 ),
                             next_insn_lex_pt[:])
+                    stmt_added = True
+
+                # Note: before/after may refer to same stmt, in which case
+                # both of the above conditionals execute
 
+                if stmt_added:
                     # increment lex dim val enumerating items in current code block
                     next_insn_lex_pt[-1] = next_insn_lex_pt[-1] + 1
                     next_sid += 1
 
                     # all current (nested) blocks now contain a statement
-                    stmt_since_last_block_at_tier = [True]*len(
-                        stmt_since_last_block_at_tier)
+                    stmt_added_since_prev_block_at_tier = [True]*len(
+                        stmt_added_since_prev_block_at_tier)
             else:
                 pass
             # to save time, stop when we've created both statements

From 7d291b0b49b878cc2c02767c717f6e652eb40cf2 Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Sun, 19 Apr 2020 21:24:18 -0500
Subject: [PATCH 024/460] allow for all valid linearization orders in
 LexSchedule/map creation test

---
 test/test_linearization_checker.py | 187 +++++++++++++++--------------
 1 file changed, 97 insertions(+), 90 deletions(-)

diff --git a/test/test_linearization_checker.py b/test/test_linearization_checker.py
index 68688f0df..c112b40ae 100644
--- a/test/test_linearization_checker.py
+++ b/test/test_linearization_checker.py
@@ -197,43 +197,53 @@ def test_lexschedule_and_islmap_creation():
     # ------------------------------------------------------------------------------
     # Relationship between insn_a and insn_d ---------------------------------------
 
-    assert lex_sched_ad.stmt_instance_before.lex_pt == [0, 'i', 0, 'k', 0]
-    assert lex_sched_ad.stmt_instance_after.lex_pt == [1, 't', 0, 0, 0]
+    # insn_a and insn_d could have been linearized in either order
+    # (i loop could be before or after t loop)
+    def perform_insn_ad_checks_with(sid_a, sid_d):
+        assert lex_sched_ad.stmt_instance_before.lex_pt == [sid_a, 'i', 0, 'k', 0]
+        assert lex_sched_ad.stmt_instance_after.lex_pt == [sid_d, 't', 0, 0, 0]
 
-    # Get two isl maps representing the LexSchedule
+        # Get two isl maps representing the LexSchedule
 
-    isl_sched_map_before, isl_sched_map_after = \
-         get_isl_maps_for_LexSchedule(lex_sched_ad, knl, "insn_a", "insn_d")
+        isl_sched_map_before, isl_sched_map_after = \
+             get_isl_maps_for_LexSchedule(lex_sched_ad, knl, "insn_a", "insn_d")
 
-    # Create expected maps, align, compare
+        # Create expected maps, align, compare
 
-    isl_sched_map_before_expected = isl.Map(
-        "[pi, pk] -> { "
-        "[statement = 0, i, k] -> [l0 = 0, l1 = i, l2 = 0, l3 = k, l4 = 0] : "
-        "0 <= i < pi and 0 <= k < pk }"
-        )
-    isl_sched_map_before_expected = align_isl_maps_by_var_names(
-        isl_sched_map_before_expected, isl_sched_map_before)
+        isl_sched_map_before_expected = isl.Map(
+            "[pi, pk] -> { "
+            "[statement = %d, i, k] -> [l0 = %d, l1 = i, l2 = 0, l3 = k, l4 = 0] : "
+            "0 <= i < pi and 0 <= k < pk }"
+            % (sid_a, sid_a)
+            )
+        isl_sched_map_before_expected = align_isl_maps_by_var_names(
+            isl_sched_map_before_expected, isl_sched_map_before)
 
-    isl_sched_map_after_expected = isl.Map(
-        "[pt] -> { "
-        "[statement = 1, t] -> [l0 = 1, l1 = t, l2 = 0, l3 = 0, l4 = 0] : "
-        "0 <= t < pt }"
-        )
-    isl_sched_map_after_expected = align_isl_maps_by_var_names(
-        isl_sched_map_after_expected, isl_sched_map_after)
+        isl_sched_map_after_expected = isl.Map(
+            "[pt] -> { "
+            "[statement = %d, t] -> [l0 = %d, l1 = t, l2 = 0, l3 = 0, l4 = 0] : "
+            "0 <= t < pt }"
+            % (sid_d, sid_d)
+            )
+        isl_sched_map_after_expected = align_isl_maps_by_var_names(
+            isl_sched_map_after_expected, isl_sched_map_after)
 
-    assert isl_sched_map_before == isl_sched_map_before_expected
-    assert isl_sched_map_after == isl_sched_map_after_expected
+        assert isl_sched_map_before == isl_sched_map_before_expected
+        assert isl_sched_map_after == isl_sched_map_after_expected
+
+    if lex_sched_ad.stmt_instance_before.stmt.int_id == 0:
+        perform_insn_ad_checks_with(0, 1)
+    else:
+        perform_insn_ad_checks_with(1, 0)
 
     # ------------------------------------------------------------------------------
     # Relationship between insn_b and insn_c ---------------------------------------
 
     # insn_b and insn_c could have been linearized in either order
-    if lex_sched_bc.stmt_instance_before.stmt.int_id == 0:
-        # insn_c comes first
-        assert lex_sched_bc.stmt_instance_before.lex_pt == [0, 'i', 0, 'j', 0]
-        assert lex_sched_bc.stmt_instance_after.lex_pt == [0, 'i', 0, 'j', 1]
+    # (i loop could be before or after t loop)
+    def perform_insn_bc_checks_with(sid_b, sid_c):
+        assert lex_sched_bc.stmt_instance_before.lex_pt == [0, 'i', 0, 'j', sid_b]
+        assert lex_sched_bc.stmt_instance_after.lex_pt == [0, 'i', 0, 'j', sid_c]
 
         # Get two isl maps representing the LexSchedule
 
@@ -244,46 +254,60 @@ def test_lexschedule_and_islmap_creation():
 
         isl_sched_map_before_expected = isl.Map(
             "[pi, pj] -> { "
-            "[statement = 0, i, j] -> [l0 = 0, l1 = i, l2 = 0, l3 = j, l4 = 1] : "
+            "[statement = %d, i, j] -> [l0 = 0, l1 = i, l2 = 0, l3 = j, l4 = %d] : "
             "0 <= i < pi and 0 <= j < pj }"
+            % (sid_b, sid_b)
             )
         isl_sched_map_before_expected = align_isl_maps_by_var_names(
             isl_sched_map_before_expected, isl_sched_map_before)
 
         isl_sched_map_after_expected = isl.Map(
             "[pi, pj] -> { "
-            "[statement = 1, i, j] -> [l0 = 0, l1 = i, l2 = 0, l3 = j, l4 = 0] : "
+            "[statement = %d, i, j] -> [l0 = 0, l1 = i, l2 = 0, l3 = j, l4 = %d] : "
             "0 <= i < pi and 0 <= j < pj }"
+            % (sid_c, sid_c)
             )
         isl_sched_map_after_expected = align_isl_maps_by_var_names(
             isl_sched_map_after_expected, isl_sched_map_after)
 
         assert isl_sched_map_before == isl_sched_map_before_expected
         assert isl_sched_map_after == isl_sched_map_after_expected
-    elif lex_sched_bc.stmt_instance_before.stmt.int_id == 1:
-        # insn_c comes first
-        assert lex_sched_bc.stmt_instance_before.lex_pt == [0, 'i', 0, 'j', 1]
-        assert lex_sched_bc.stmt_instance_after.lex_pt == [0, 'i', 0, 'j', 0]
+
+    if lex_sched_bc.stmt_instance_before.stmt.int_id == 0:
+        perform_insn_bc_checks_with(0, 1)
+    else:
+        perform_insn_bc_checks_with(1, 0)
+
+    # ------------------------------------------------------------------------------
+    # Relationship between insn_b and insn_d ---------------------------------------
+
+    # insn_b and insn_d could have been linearized in either order
+    # (i loop could be before or after t loop)
+    def perform_insn_bd_checks_with(sid_b, sid_d):
+        assert lex_sched_bd.stmt_instance_before.lex_pt == [sid_b, 'i', 0, 'j', 0]
+        assert lex_sched_bd.stmt_instance_after.lex_pt == [sid_d, 't', 0, 0, 0]
 
         # Get two isl maps representing the LexSchedule
 
         isl_sched_map_before, isl_sched_map_after = \
-             get_isl_maps_for_LexSchedule(lex_sched_bc, knl, "insn_b", "insn_c")
+             get_isl_maps_for_LexSchedule(lex_sched_bd, knl, "insn_b", "insn_d")
 
         # Create expected maps, align, compare
 
         isl_sched_map_before_expected = isl.Map(
             "[pi, pj] -> { "
-            "[statement = 1, i, j] -> [l0 = 0, l1 = i, l2 = 0, l3 = j, l4 = 1] : "
+            "[statement = %d, i, j] -> [l0 = %d, l1 = i, l2 = 0, l3 = j, l4 = 0] : "
             "0 <= i < pi and 0 <= j < pj }"
+            % (sid_b, sid_b)
             )
         isl_sched_map_before_expected = align_isl_maps_by_var_names(
             isl_sched_map_before_expected, isl_sched_map_before)
 
         isl_sched_map_after_expected = isl.Map(
-            "[pi, pj] -> { "
-            "[statement = 0, i, j] -> [l0 = 0, l1 = i, l2 = 0, l3 = j, l4 = 0] : "
-            "0 <= i < pi and 0 <= j < pj }"
+            "[pt] -> { "
+            "[statement = %d, t] -> [l0 = %d, l1 = t, l2 = 0, l3 = 0, l4 = 0] : "
+            "0 <= t < pt }"
+            % (sid_d, sid_d)
             )
         isl_sched_map_after_expected = align_isl_maps_by_var_names(
             isl_sched_map_after_expected, isl_sched_map_after)
@@ -291,69 +315,52 @@ def test_lexschedule_and_islmap_creation():
         assert isl_sched_map_before == isl_sched_map_before_expected
         assert isl_sched_map_after == isl_sched_map_after_expected
 
-    # ------------------------------------------------------------------------------
-    # Relationship between insn_b and insn_d ---------------------------------------
-
-    assert lex_sched_bd.stmt_instance_before.lex_pt == [0, 'i', 0, 'j', 0]
-    assert lex_sched_bd.stmt_instance_after.lex_pt == [1, 't', 0, 0, 0]
-
-    # Get two isl maps representing the LexSchedule
-
-    isl_sched_map_before, isl_sched_map_after = \
-         get_isl_maps_for_LexSchedule(lex_sched_bd, knl, "insn_b", "insn_d")
-
-    # Create expected maps, align, compare
-
-    isl_sched_map_before_expected = isl.Map(
-        "[pi, pj] -> { "
-        "[statement = 0, i, j] -> [l0 = 0, l1 = i, l2 = 0, l3 = j, l4 = 0] : "
-        "0 <= i < pi and 0 <= j < pj }"
-        )
-    isl_sched_map_before_expected = align_isl_maps_by_var_names(
-        isl_sched_map_before_expected, isl_sched_map_before)
-
-    isl_sched_map_after_expected = isl.Map(
-        "[pt] -> { "
-        "[statement = 1, t] -> [l0 = 1, l1 = t, l2 = 0, l3 = 0, l4 = 0] : "
-        "0 <= t < pt }"
-        )
-    isl_sched_map_after_expected = align_isl_maps_by_var_names(
-        isl_sched_map_after_expected, isl_sched_map_after)
-
-    assert isl_sched_map_before == isl_sched_map_before_expected
-    assert isl_sched_map_after == isl_sched_map_after_expected
+    if lex_sched_bd.stmt_instance_before.stmt.int_id == 0:
+        perform_insn_bd_checks_with(0, 1)
+    else:
+        perform_insn_bd_checks_with(1, 0)
 
     # ------------------------------------------------------------------------------
     # Relationship between insn_c and insn_d ---------------------------------------
 
-    assert lex_sched_cd.stmt_instance_before.lex_pt == [0, 'i', 0, 'j', 0]
-    assert lex_sched_cd.stmt_instance_after.lex_pt == [1, 't', 0, 0, 0]
+    # insn_c and insn_d could have been linearized in either order
+    # (i loop could be before or after t loop)
+    def perform_insn_cd_checks_with(sid_c, sid_d):
+        assert lex_sched_cd.stmt_instance_before.lex_pt == [sid_c, 'i', 0, 'j', 0]
+        assert lex_sched_cd.stmt_instance_after.lex_pt == [sid_d, 't', 0, 0, 0]
 
-    # Get two isl maps representing the LexSchedule
+        # Get two isl maps representing the LexSchedule
 
-    isl_sched_map_before, isl_sched_map_after = \
-         get_isl_maps_for_LexSchedule(lex_sched_cd, knl, "insn_c", "insn_d")
+        isl_sched_map_before, isl_sched_map_after = \
+             get_isl_maps_for_LexSchedule(lex_sched_cd, knl, "insn_c", "insn_d")
 
-    # Create expected maps, align, compare
+        # Create expected maps, align, compare
 
-    isl_sched_map_before_expected = isl.Map(
-        "[pi, pj] -> { "
-        "[statement = 0, i, j] -> [l0 = 0, l1 = i, l2 = 0, l3 = j, l4 = 0] : "
-        "0 <= i < pi and 0 <= j < pj }"
-        )
-    isl_sched_map_before_expected = align_isl_maps_by_var_names(
-        isl_sched_map_before_expected, isl_sched_map_before)
+        isl_sched_map_before_expected = isl.Map(
+            "[pi, pj] -> { "
+            "[statement = %d, i, j] -> [l0 = %d, l1 = i, l2 = 0, l3 = j, l4 = 0] : "
+            "0 <= i < pi and 0 <= j < pj }"
+            % (sid_c, sid_c)
+            )
+        isl_sched_map_before_expected = align_isl_maps_by_var_names(
+            isl_sched_map_before_expected, isl_sched_map_before)
 
-    isl_sched_map_after_expected = isl.Map(
-        "[pt] -> { "
-        "[statement = 1, t] -> [l0 = 1, l1 = t, l2 = 0, l3 = 0, l4 = 0] : "
-        "0 <= t < pt }"
-        )
-    isl_sched_map_after_expected = align_isl_maps_by_var_names(
-        isl_sched_map_after_expected, isl_sched_map_after)
+        isl_sched_map_after_expected = isl.Map(
+            "[pt] -> { "
+            "[statement = %d, t] -> [l0 = %d, l1 = t, l2 = 0, l3 = 0, l4 = 0] : "
+            "0 <= t < pt }"
+            % (sid_d, sid_d)
+            )
+        isl_sched_map_after_expected = align_isl_maps_by_var_names(
+            isl_sched_map_after_expected, isl_sched_map_after)
 
-    assert isl_sched_map_before == isl_sched_map_before_expected
-    assert isl_sched_map_after == isl_sched_map_after_expected
+        assert isl_sched_map_before == isl_sched_map_before_expected
+        assert isl_sched_map_after == isl_sched_map_after_expected
+
+    if lex_sched_cd.stmt_instance_before.stmt.int_id == 0:
+        perform_insn_cd_checks_with(0, 1)
+    else:
+        perform_insn_cd_checks_with(1, 0)
 
 
 if __name__ == "__main__":

From 45b29f7a3c97760ccba104e6e189522b1587b552 Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Mon, 20 Apr 2020 20:34:04 -0500
Subject: [PATCH 025/460] adding lexicographic_order_map.py (creates isl maps
 defining lex orderings and statement instance orderings)

---
 .../checker/lexicographic_order_map.py        | 159 ++++++++++++++++++
 1 file changed, 159 insertions(+)
 create mode 100644 loopy/schedule/checker/lexicographic_order_map.py

diff --git a/loopy/schedule/checker/lexicographic_order_map.py b/loopy/schedule/checker/lexicographic_order_map.py
new file mode 100644
index 000000000..2e063e7d7
--- /dev/null
+++ b/loopy/schedule/checker/lexicographic_order_map.py
@@ -0,0 +1,159 @@
+__copyright__ = "Copyright (C) 2019 James Stevens"
+
+__license__ = """
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+"""
+
+import islpy as isl
+
+
+def get_statement_ordering_map(
+        sched_map_before, sched_map_after, lex_map, before_marker="'"):
+    """Return a mapping that maps each statement instance to
+        all statement instances occuring later.
+
+    :arg sched_map_before: An :class:`islpy.Map` representing instruction
+        instance order for the dependee as a mapping from each statement
+        instance to a point in the lexicographic ordering.
+
+    :arg sched_map_after: An :class:`islpy.Map` representing instruction
+        instance order for the depender as a mapping from each statement
+        instance to a point in the lexicographic ordering.
+
+    :arg lex_map: An :class:`islpy.Map` representing a lexicographic
+        ordering as a mapping from each point in lexicographic time
+        to every point that occurs later in lexicographic time. E.g.::
+
+            {[i0', i1', i2', ...] -> [i0, i1, i2, ...] :
+                i0' < i0 or (i0' = i0 and i1' < i1)
+                or (i0' = i0 and i1' = i1 and i2' < i2) ...}
+
+    :returns: An :class:`islpy.Map` representing the lex schedule as
+        a mapping from each statement instance to all statement instances
+        occuring later. I.e., we compose B -> L -> A^-1, where B
+        is sched_map_before, A is sched_map_after, and L is the
+        lexicographic ordering map.
+
+    """
+
+    sio = sched_map_before.apply_range(
+        lex_map).apply_range(sched_map_after.reverse())
+    # append marker to in names
+    for i in range(sio.dim(isl.dim_type.in_)):
+        sio = sio.set_dim_name(isl.dim_type.in_, i, sio.get_dim_name(
+            isl.dim_type.in_, i)+before_marker)
+    return sio
+
+
+def get_lex_order_constraint(islvars, before_names, after_names):
+    """Return a constraint represented as an :class:`islpy.Set`
+        defining a 'happens before' relationship in a lexicographic
+        ordering.
+
+    :arg islvars: A dictionary from variable names to :class:`islpy.PwAff`
+        instances that represent each of the variables
+        (islvars may be produced by `islpy.make_zero_and_vars`). The key
+        '0' is also include and represents a :class:`islpy.PwAff` zero constant.
+        This dictionary defines the space to be used for the set.
+
+    :arg before_names: A list of :class:`str` variable names representing
+        the lexicographic space dimensions for a point in lexicographic
+        time that occurs before. (see example below)
+
+    :arg after_names: A list of :class:`str` variable names representing
+        the lexicographic space dimensions for a point in lexicographic
+        time that occurs after. (see example below)
+
+    :returns: An :class:`islpy.Set` representing a constraint that enforces a
+        lexicographic ordering. E.g., if ``before_names = [i0', i1', i2']`` and
+        ``after_names = [i0, i1, i2]``, return the set::
+
+            {[i0', i1', i2', i0, i1, i2] :
+                i0' < i0 or (i0' = i0 and i1' < i1)
+                or (i0' = i0 and i1' = i1 and i2' < i2)}
+
+    """
+
+    lex_order_constraint = islvars[before_names[0]].lt_set(islvars[after_names[0]])
+    for i in range(1, len(before_names)):
+        lex_order_constraint_conj = islvars[before_names[i]].lt_set(
+            islvars[after_names[i]])
+        for j in range(i):
+            lex_order_constraint_conj = lex_order_constraint_conj & \
+                islvars[before_names[j]].eq_set(islvars[after_names[j]])
+        lex_order_constraint = lex_order_constraint | lex_order_constraint_conj
+    return lex_order_constraint
+
+
+def create_lex_order_map(
+        n_dims,
+        before_names=None,
+        after_names=None,
+        ):
+    """Return a mapping that maps each point in a lexicographic
+        ordering to every point that occurs later in lexicographic
+        time.
+
+    :arg n_dims: An :class:`int` representing the number of dimensions
+        in the lexicographic ordering.
+
+    :arg before_names: A list of :class:`str` variable names representing
+        the lexicographic space dimensions for a point in lexicographic
+        time that occurs before. (see example below)
+
+    :arg after_names: A list of :class:`str` variable names representing
+        the lexicographic space dimensions for a point in lexicographic
+        time that occurs after. (see example below)
+
+    :returns: An :class:`islpy.Map` representing a lexicographic
+        ordering as a mapping from each point in lexicographic time
+        to every point that occurs later in lexicographic time.
+        E.g., if ``before_names = [i0', i1', i2']`` and
+        ``after_names = [i0, i1, i2]``, return the map::
+
+            {[i0', i1', i2'] -> [i0, i1, i2] :
+                i0' < i0 or (i0' = i0 and i1' < i1)
+                or (i0' = i0 and i1' = i1 and i2' < i2)}
+
+    """
+
+    if before_names is None:
+        before_names = ["i%s" % (i) for i in range(n_dims)]
+    if after_names is None:
+        from loopy.schedule.checker.utils import (
+            append_marker_to_strings,
+        )
+        after_names = append_marker_to_strings(before_names, marker="_")
+
+    assert len(before_names) == len(after_names) == n_dims
+    dim_type = isl.dim_type
+
+    islvars = isl.make_zero_and_vars(
+            before_names+after_names,
+            [])
+
+    lex_order_constraint = get_lex_order_constraint(
+        islvars, before_names, after_names)
+
+    lex_map = isl.Map.from_domain(lex_order_constraint)
+    lex_map = lex_map.move_dims(
+        dim_type.out, 0, dim_type.in_,
+        len(before_names), len(after_names))
+
+    return lex_map

From 782dde2330328a0716bda113efc1526257c3fcbe Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Mon, 20 Apr 2020 20:35:41 -0500
Subject: [PATCH 026/460] add get_lex_order_map_for_sched_space() to schedule
 (gets an isl map defining the lexicographic ordering)

---
 loopy/schedule/checker/schedule.py | 13 +++++++++++++
 loopy/schedule/checker/utils.py    |  7 +++++++
 2 files changed, 20 insertions(+)

diff --git a/loopy/schedule/checker/schedule.py b/loopy/schedule/checker/schedule.py
index 0aca588c3..305d1f74f 100644
--- a/loopy/schedule/checker/schedule.py
+++ b/loopy/schedule/checker/schedule.py
@@ -405,6 +405,19 @@ def get_lex_var_names(self):
         return [self.lex_var_prefix+str(i)
                 for i in range(self.max_lex_dims())]
 
+    def get_lex_order_map_for_sched_space(self):
+        """Return an :class:`islpy.BasicMap` that maps each point in a
+            lexicographic ordering to every point that is
+            lexocigraphically greater.
+        """
+
+        from loopy.schedule.checker.lexicographic_order_map import (
+            create_lex_order_map,
+        )
+        n_dims = self.max_lex_dims()
+        return create_lex_order_map(
+            n_dims, before_names=self.get_lex_var_names())
+
     def __str__(self):
 
         def stringify_sched_stmt_instance(stmt_inst):
diff --git a/loopy/schedule/checker/utils.py b/loopy/schedule/checker/utils.py
index cb933de6f..8757406b7 100644
--- a/loopy/schedule/checker/utils.py
+++ b/loopy/schedule/checker/utils.py
@@ -143,6 +143,13 @@ def align_isl_maps_by_var_names(input_map, target_map):
     return aligned_input_map
 
 
+def append_marker_to_strings(strings, marker="'"):
+    if not isinstance(strings, list):
+        raise ValueError("append_marker_to_strings did not receive a list")
+    else:
+        return [s+marker for s in strings]
+
+
 def _union_of_isl_sets_or_maps(set_list):
     union = set_list[0]
     for s in set_list[1:]:

From 0e664550837299ff697d5f6947fed9d90d2cc095 Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Mon, 20 Apr 2020 22:13:50 -0500
Subject: [PATCH 027/460] add function append_marker_to_in_dim_names(islmap)

---
 loopy/schedule/checker/lexicographic_order_map.py | 8 ++++----
 loopy/schedule/checker/utils.py                   | 8 ++++++++
 2 files changed, 12 insertions(+), 4 deletions(-)

diff --git a/loopy/schedule/checker/lexicographic_order_map.py b/loopy/schedule/checker/lexicographic_order_map.py
index 2e063e7d7..61f191247 100644
--- a/loopy/schedule/checker/lexicographic_order_map.py
+++ b/loopy/schedule/checker/lexicographic_order_map.py
@@ -55,10 +55,10 @@ def get_statement_ordering_map(
     sio = sched_map_before.apply_range(
         lex_map).apply_range(sched_map_after.reverse())
     # append marker to in names
-    for i in range(sio.dim(isl.dim_type.in_)):
-        sio = sio.set_dim_name(isl.dim_type.in_, i, sio.get_dim_name(
-            isl.dim_type.in_, i)+before_marker)
-    return sio
+    from loopy.schedule.checker.utils import (
+        append_marker_to_in_dim_names,
+    )
+    return append_marker_to_in_dim_names(sio, before_marker)
 
 
 def get_lex_order_constraint(islvars, before_names, after_names):
diff --git a/loopy/schedule/checker/utils.py b/loopy/schedule/checker/utils.py
index 8757406b7..96aa007c7 100644
--- a/loopy/schedule/checker/utils.py
+++ b/loopy/schedule/checker/utils.py
@@ -150,6 +150,14 @@ def append_marker_to_strings(strings, marker="'"):
         return [s+marker for s in strings]
 
 
+def append_marker_to_in_dim_names(islmap, marker="'"):
+    # append marker to in names
+    for i in range(islmap.dim(isl.dim_type.in_)):
+        islmap = islmap.set_dim_name(isl.dim_type.in_, i, islmap.get_dim_name(
+            isl.dim_type.in_, i)+marker)
+    return islmap
+
+
 def _union_of_isl_sets_or_maps(set_list):
     union = set_list[0]
     for s in set_list[1:]:

From ceb9015a1a18d16f0615c8f3deb9cf35f0cb9ca2 Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Mon, 20 Apr 2020 22:14:38 -0500
Subject: [PATCH 028/460] test lexicographic order map creation and statement
 instance order creation

---
 test/test_linearization_checker.py | 203 +++++++++++++++++++++++++++++
 1 file changed, 203 insertions(+)

diff --git a/test/test_linearization_checker.py b/test/test_linearization_checker.py
index c112b40ae..5a05bdd8e 100644
--- a/test/test_linearization_checker.py
+++ b/test/test_linearization_checker.py
@@ -46,6 +46,8 @@
     faulthandler.enable()
 
 
+# {{{ test LexSchedule and isl map creation
+
 def test_lexschedule_and_islmap_creation():
     import islpy as isl
     from loopy.schedule.checker import (
@@ -362,6 +364,207 @@ def perform_insn_cd_checks_with(sid_c, sid_d):
     else:
         perform_insn_cd_checks_with(1, 0)
 
+# }}}
+
+
+# {{{ test statement instance ordering creation
+
+def test_statement_instance_ordering_creation():
+    import islpy as isl
+    from loopy.schedule.checker import (
+        get_schedule_for_statement_pair,
+        get_isl_maps_for_LexSchedule,
+    )
+    from loopy.schedule.checker.utils import (
+        align_isl_maps_by_var_names,
+        append_marker_to_in_dim_names,
+    )
+    from loopy.schedule.checker.lexicographic_order_map import (
+        get_statement_ordering_map,
+    )
+
+    # example kernel (add deps to fix loop order)
+    knl = lp.make_kernel(
+        [
+            "{[i]: 0<=i<pi}",
+            "{[k]: 0<=k<pk}",
+            "{[j]: 0<=j<pj}",
+            "{[t]: 0<=t<pt}",
+        ],
+        """
+        for i
+            for k
+                <>temp = b[i,k]  {id=insn_a}
+            end
+            for j
+                a[i,j] = temp + 1  {id=insn_b,dep=insn_a}
+                c[i,j] = d[i,j]  {id=insn_c,dep=insn_b}
+            end
+        end
+        for t
+            e[t] = f[t]  {id=insn_d, dep=insn_c}
+        end
+        """,
+        name="example",
+        assumptions="pi,pj,pk,pt >= 1",
+        lang_version=(2018, 2)
+        )
+    knl = lp.add_and_infer_dtypes(
+            knl,
+            {"b": np.float32, "d": np.float32, "f": np.float32})
+    knl = lp.prioritize_loops(knl, "i,k")
+    knl = lp.prioritize_loops(knl, "i,j")
+
+    # get a linearization
+    knl = preprocess_kernel(knl)
+    knl = get_one_linearized_kernel(knl)
+    linearization_items = knl.linearization
+
+    def check_sio_for_insn_pair(
+            insn_id_before,
+            insn_id_after,
+            expected_lex_order_map,
+            expected_sio,
+            ):
+
+        lex_sched = get_schedule_for_statement_pair(
+            knl,
+            linearization_items,
+            insn_id_before,
+            insn_id_after,
+            )
+
+        # Get two isl maps representing the LexSchedule
+        isl_sched_map_before, isl_sched_map_after = \
+             get_isl_maps_for_LexSchedule(lex_sched, knl, insn_id_before, insn_id_after)
+
+        # get map representing lexicographic ordering
+        sched_lex_order_map = lex_sched.get_lex_order_map_for_sched_space()
+
+        assert sched_lex_order_map == expected_lex_order_map
+
+        # create statement instance ordering,
+        # maps each statement instance to all statement instances occuring later
+        sio = get_statement_ordering_map(
+            isl_sched_map_before,
+            isl_sched_map_after,
+            sched_lex_order_map,
+            )
+
+        print(sio)
+        print(expected_sio)
+
+        sio_aligned = align_isl_maps_by_var_names(sio, expected_sio)
+
+        print(sio_aligned)
+        print(expected_sio)
+
+        assert sio_aligned == expected_sio
+
+    expected_lex_order_map = isl.Map(
+        "{ "
+        "[l0, l1, l2, l3, l4] -> [l0_, l1_, l2_, l3_, l4_] : l0_ > l0; "
+        "[l0, l1, l2, l3, l4] -> [l0_= l0, l1_, l2_, l3_, l4_] : l1_ > l1; "
+        "[l0, l1, l2, l3, l4] -> [l0_= l0, l1_= l1, l2_, l3_, l4_] : l2_ > l2; "
+        "[l0, l1, l2, l3, l4] -> [l0_= l0, l1_= l1, l2_= l2, l3_, l4_] : l3_ > l3; "
+        "[l0, l1, l2, l3, l4] -> [l0_= l0, l1_= l1, l2_= l2, l3_= l3, l4_] : l4_ > l4 "
+        "}"
+        )
+
+    # Relationship between insn_a and insn_b ---------------------------------------
+
+    expected_sio = isl.Map(
+        "[pi, pj, pk] -> { "
+        "[statement' = 0, i', k'] -> [statement = 1, i, j] : "
+        "0 <= i' < pi and 0 <= k' < pk and 0 <= j < pj and 0 <= i < pi and i > i'; "
+        "[statement' = 0, i', k'] -> [statement = 1, i = i', j] : "
+        "0 <= i' < pi and 0 <= k' < pk and 0 <= j < pj "
+        "}"
+        )
+    # isl ignores these apostrophes, so explicitly add them
+    expected_sio = append_marker_to_in_dim_names(expected_sio, "'")
+
+    check_sio_for_insn_pair(
+        "insn_a", "insn_b", expected_lex_order_map, expected_sio)
+
+    # Relationship between insn_a and insn_c ---------------------------------------
+
+    expected_sio = isl.Map(
+        "[pi, pj, pk] -> { "
+        "[statement' = 0, i', k'] -> [statement = 1, i, j] : "
+        "0 <= i' < pi and 0 <= k' < pk and 0 <= j < pj and 0 <= i < pi and i > i'; "
+        "[statement' = 0, i', k'] -> [statement = 1, i = i', j] : "
+        "0 <= i' < pi and 0 <= k' < pk and 0 <= j < pj "
+        "}"
+        )
+    # isl ignores these apostrophes, so explicitly add them
+    expected_sio = append_marker_to_in_dim_names(expected_sio, "'")
+
+    check_sio_for_insn_pair(
+        "insn_a", "insn_c", expected_lex_order_map, expected_sio)
+
+    # Relationship between insn_a and insn_d ---------------------------------------
+
+    expected_sio = isl.Map(
+        "[pt, pi, pk] -> { "
+        "[statement' = 0, i', k'] -> [statement = 1, t] : "
+        "0 <= i' < pi and 0 <= k' < pk and 0 <= t < pt "
+        "}"
+        )
+    # isl ignores these apostrophes, so explicitly add them
+    expected_sio = append_marker_to_in_dim_names(expected_sio, "'")
+
+    check_sio_for_insn_pair(
+        "insn_a", "insn_d", expected_lex_order_map, expected_sio)
+
+    # Relationship between insn_b and insn_c ---------------------------------------
+
+    expected_sio = isl.Map(
+        "[pi, pj] -> { "
+        "[statement' = 0, i', j'] -> [statement = 1, i, j] : "
+        "0 <= i' < pi and 0 <= j' < pj and i > i' and 0 <= i < pi and 0 <= j < pj; "
+        "[statement' = 0, i', j'] -> [statement = 1, i = i', j] : "
+        "0 <= i' < pi and 0 <= j' < pj and j > j' and 0 <= j < pj; "
+        "[statement' = 0, i', j'] -> [statement = 1, i = i', j = j'] : "
+        "0 <= i' < pi and 0 <= j' < pj "
+        "}"
+        )
+    # isl ignores these apostrophes, so explicitly add them
+    expected_sio = append_marker_to_in_dim_names(expected_sio, "'")
+
+    check_sio_for_insn_pair(
+        "insn_b", "insn_c", expected_lex_order_map, expected_sio)
+
+    # Relationship between insn_b and insn_d ---------------------------------------
+
+    expected_sio = isl.Map(
+        "[pt, pi, pj] -> { "
+        "[statement' = 0, i', j'] -> [statement = 1, t] : "
+        "0 <= i' < pi and 0 <= j' < pj and 0 <= t < pt "
+        "}"
+        )
+    # isl ignores these apostrophes, so explicitly add them
+    expected_sio = append_marker_to_in_dim_names(expected_sio, "'")
+
+    check_sio_for_insn_pair(
+        "insn_b", "insn_d", expected_lex_order_map, expected_sio)
+
+    # Relationship between insn_c and insn_d ---------------------------------------
+
+    expected_sio = isl.Map(
+        "[pt, pi, pj] -> { "
+        "[statement' = 0, i', j'] -> [statement = 1, t] : "
+        "0 <= i' < pi and 0 <= j' < pj and 0 <= t < pt "
+        "}"
+        )
+    # isl ignores these apostrophes, so explicitly add them
+    expected_sio = append_marker_to_in_dim_names(expected_sio, "'")
+
+    check_sio_for_insn_pair(
+        "insn_c", "insn_d", expected_lex_order_map, expected_sio)
+
+# }}}
+
 
 if __name__ == "__main__":
     if len(sys.argv) > 1:

From 6f109f979f39a4ab2cc7839ea582b1457c538ac6 Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Mon, 20 Apr 2020 22:28:38 -0500
Subject: [PATCH 029/460] fixing flake8 issues

---
 test/test_linearization_checker.py | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/test/test_linearization_checker.py b/test/test_linearization_checker.py
index 5a05bdd8e..52145915d 100644
--- a/test/test_linearization_checker.py
+++ b/test/test_linearization_checker.py
@@ -435,8 +435,8 @@ def check_sio_for_insn_pair(
             )
 
         # Get two isl maps representing the LexSchedule
-        isl_sched_map_before, isl_sched_map_after = \
-             get_isl_maps_for_LexSchedule(lex_sched, knl, insn_id_before, insn_id_after)
+        isl_sched_map_before, isl_sched_map_after = get_isl_maps_for_LexSchedule(
+            lex_sched, knl, insn_id_before, insn_id_after)
 
         # get map representing lexicographic ordering
         sched_lex_order_map = lex_sched.get_lex_order_map_for_sched_space()
@@ -463,11 +463,11 @@ def check_sio_for_insn_pair(
 
     expected_lex_order_map = isl.Map(
         "{ "
-        "[l0, l1, l2, l3, l4] -> [l0_, l1_, l2_, l3_, l4_] : l0_ > l0; "
-        "[l0, l1, l2, l3, l4] -> [l0_= l0, l1_, l2_, l3_, l4_] : l1_ > l1; "
-        "[l0, l1, l2, l3, l4] -> [l0_= l0, l1_= l1, l2_, l3_, l4_] : l2_ > l2; "
-        "[l0, l1, l2, l3, l4] -> [l0_= l0, l1_= l1, l2_= l2, l3_, l4_] : l3_ > l3; "
-        "[l0, l1, l2, l3, l4] -> [l0_= l0, l1_= l1, l2_= l2, l3_= l3, l4_] : l4_ > l4 "
+        "[l0, l1, l2, l3, l4] -> [l0_, l1_, l2_, l3_, l4_]: l0_ > l0; "
+        "[l0, l1, l2, l3, l4] -> [l0_= l0, l1_, l2_, l3_, l4_]: l1_ > l1; "
+        "[l0, l1, l2, l3, l4] -> [l0_= l0, l1_= l1, l2_, l3_, l4_]: l2_ > l2; "
+        "[l0, l1, l2, l3, l4] -> [l0_= l0, l1_= l1, l2_= l2, l3_, l4_]: l3_ > l3; "
+        "[l0, l1, l2, l3, l4] -> [l0_= l0, l1_= l1, l2_= l2, l3_= l3, l4_]: l4_ > l4"
         "}"
         )
 

From ae7f906a83159796f0ae21929f7dd8d08d518279 Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Tue, 21 Apr 2020 03:57:15 -0500
Subject: [PATCH 030/460] replace append_marker_to_in_dim_names() with more
 generic append_marker_to_isl_map_var_names() that allows dim specification

---
 .../checker/lexicographic_order_map.py        |  5 ++--
 loopy/schedule/checker/utils.py               | 29 ++++++++++++++-----
 test/test_linearization_checker.py            | 20 ++++++++-----
 3 files changed, 37 insertions(+), 17 deletions(-)

diff --git a/loopy/schedule/checker/lexicographic_order_map.py b/loopy/schedule/checker/lexicographic_order_map.py
index 61f191247..ddc320ed9 100644
--- a/loopy/schedule/checker/lexicographic_order_map.py
+++ b/loopy/schedule/checker/lexicographic_order_map.py
@@ -56,9 +56,10 @@ def get_statement_ordering_map(
         lex_map).apply_range(sched_map_after.reverse())
     # append marker to in names
     from loopy.schedule.checker.utils import (
-        append_marker_to_in_dim_names,
+        append_marker_to_isl_map_var_names,
     )
-    return append_marker_to_in_dim_names(sio, before_marker)
+    return append_marker_to_isl_map_var_names(
+        sio, isl.dim_type.in_, before_marker)
 
 
 def get_lex_order_constraint(islvars, before_names, after_names):
diff --git a/loopy/schedule/checker/utils.py b/loopy/schedule/checker/utils.py
index 96aa007c7..46c33ed3b 100644
--- a/loopy/schedule/checker/utils.py
+++ b/loopy/schedule/checker/utils.py
@@ -143,6 +143,27 @@ def align_isl_maps_by_var_names(input_map, target_map):
     return aligned_input_map
 
 
+def append_marker_to_isl_map_var_names(old_isl_map, dim_type, marker="'"):
+    """Return an isl_map with marker appended to
+        dim_type dimension names.
+
+    :arg old_isl_map: A :class:`islpy.Map`.
+
+    :arg dim_type: A :class:`islpy.dim_type`, i.e., an :class:`int`,
+        specifying the dimension to be marked.
+
+    :returns: A :class:`islpy.Map` matching `old_isl_map` with
+        apostrophes appended to dim_type dimension names.
+
+    """
+
+    new_map = old_isl_map.copy()
+    for i in range(len(old_isl_map.get_var_names(dim_type))):
+        new_map = new_map.set_dim_name(dim_type, i, old_isl_map.get_dim_name(
+            dim_type, i)+marker)
+    return new_map
+
+
 def append_marker_to_strings(strings, marker="'"):
     if not isinstance(strings, list):
         raise ValueError("append_marker_to_strings did not receive a list")
@@ -150,14 +171,6 @@ def append_marker_to_strings(strings, marker="'"):
         return [s+marker for s in strings]
 
 
-def append_marker_to_in_dim_names(islmap, marker="'"):
-    # append marker to in names
-    for i in range(islmap.dim(isl.dim_type.in_)):
-        islmap = islmap.set_dim_name(isl.dim_type.in_, i, islmap.get_dim_name(
-            isl.dim_type.in_, i)+marker)
-    return islmap
-
-
 def _union_of_isl_sets_or_maps(set_list):
     union = set_list[0]
     for s in set_list[1:]:
diff --git a/test/test_linearization_checker.py b/test/test_linearization_checker.py
index 52145915d..a15d48d1c 100644
--- a/test/test_linearization_checker.py
+++ b/test/test_linearization_checker.py
@@ -377,7 +377,7 @@ def test_statement_instance_ordering_creation():
     )
     from loopy.schedule.checker.utils import (
         align_isl_maps_by_var_names,
-        append_marker_to_in_dim_names,
+        append_marker_to_isl_map_var_names,
     )
     from loopy.schedule.checker.lexicographic_order_map import (
         get_statement_ordering_map,
@@ -482,7 +482,8 @@ def check_sio_for_insn_pair(
         "}"
         )
     # isl ignores these apostrophes, so explicitly add them
-    expected_sio = append_marker_to_in_dim_names(expected_sio, "'")
+    expected_sio = append_marker_to_isl_map_var_names(
+        expected_sio, isl.dim_type.in_, "'")
 
     check_sio_for_insn_pair(
         "insn_a", "insn_b", expected_lex_order_map, expected_sio)
@@ -498,7 +499,8 @@ def check_sio_for_insn_pair(
         "}"
         )
     # isl ignores these apostrophes, so explicitly add them
-    expected_sio = append_marker_to_in_dim_names(expected_sio, "'")
+    expected_sio = append_marker_to_isl_map_var_names(
+        expected_sio, isl.dim_type.in_, "'")
 
     check_sio_for_insn_pair(
         "insn_a", "insn_c", expected_lex_order_map, expected_sio)
@@ -512,7 +514,8 @@ def check_sio_for_insn_pair(
         "}"
         )
     # isl ignores these apostrophes, so explicitly add them
-    expected_sio = append_marker_to_in_dim_names(expected_sio, "'")
+    expected_sio = append_marker_to_isl_map_var_names(
+        expected_sio, isl.dim_type.in_, "'")
 
     check_sio_for_insn_pair(
         "insn_a", "insn_d", expected_lex_order_map, expected_sio)
@@ -530,7 +533,8 @@ def check_sio_for_insn_pair(
         "}"
         )
     # isl ignores these apostrophes, so explicitly add them
-    expected_sio = append_marker_to_in_dim_names(expected_sio, "'")
+    expected_sio = append_marker_to_isl_map_var_names(
+        expected_sio, isl.dim_type.in_, "'")
 
     check_sio_for_insn_pair(
         "insn_b", "insn_c", expected_lex_order_map, expected_sio)
@@ -544,7 +548,8 @@ def check_sio_for_insn_pair(
         "}"
         )
     # isl ignores these apostrophes, so explicitly add them
-    expected_sio = append_marker_to_in_dim_names(expected_sio, "'")
+    expected_sio = append_marker_to_isl_map_var_names(
+        expected_sio, isl.dim_type.in_, "'")
 
     check_sio_for_insn_pair(
         "insn_b", "insn_d", expected_lex_order_map, expected_sio)
@@ -558,7 +563,8 @@ def check_sio_for_insn_pair(
         "}"
         )
     # isl ignores these apostrophes, so explicitly add them
-    expected_sio = append_marker_to_in_dim_names(expected_sio, "'")
+    expected_sio = append_marker_to_isl_map_var_names(
+        expected_sio, isl.dim_type.in_, "'")
 
     check_sio_for_insn_pair(
         "insn_c", "insn_d", expected_lex_order_map, expected_sio)

From de3f00ce791dc98c884d6c705351f4bfd9beb9b8 Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Sun, 26 Apr 2020 18:49:49 -0500
Subject: [PATCH 031/460] updated LexSchedule documentation

---
 loopy/schedule/checker/schedule.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/loopy/schedule/checker/schedule.py b/loopy/schedule/checker/schedule.py
index 0aca588c3..e8e009169 100644
--- a/loopy/schedule/checker/schedule.py
+++ b/loopy/schedule/checker/schedule.py
@@ -107,8 +107,10 @@ def __str__(self):
 
 
 class LexSchedule(object):
-    """A program ordering represented as a mapping from statement
-       instances to points in a lexicographic ordering.
+    """Given a pair of statements in a linearized kernel, LexSchedule
+    determines the (relative) order in which the instances are executed,
+    by creating a mapping from statement instances to points in a single
+    lexicographic ordering.
 
     .. attribute:: stmt_instance_before
 

From 948ada80d26df2fec69eefb58d985c979524cdb9 Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Sun, 26 Apr 2020 18:52:59 -0500
Subject: [PATCH 032/460] simplify+shorten code for getting+returning maps from
 schedule

---
 loopy/schedule/checker/__init__.py | 8 +-------
 1 file changed, 1 insertion(+), 7 deletions(-)

diff --git a/loopy/schedule/checker/__init__.py b/loopy/schedule/checker/__init__.py
index 2911351b2..260864aff 100644
--- a/loopy/schedule/checker/__init__.py
+++ b/loopy/schedule/checker/__init__.py
@@ -147,13 +147,7 @@ def get_isl_maps_for_LexSchedule(
     # }}}
 
     # {{{ Get isl maps
-    isl_sched_map_before, isl_sched_map_after = \
-        lex_sched.create_isl_maps(
-            dom_before,
-            dom_after,
-        )
+    return lex_sched.create_isl_maps(dom_before, dom_after)
     # }}}
 
-    return isl_sched_map_before, isl_sched_map_after
-
 # }}}

From 217d480ffc2222e8de0ff01638f4f6f78f51e26b Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Sun, 26 Apr 2020 19:01:30 -0500
Subject: [PATCH 033/460] remove underscores from function names if functions
 are used in separate module

---
 loopy/schedule/checker/__init__.py | 4 ++--
 loopy/schedule/checker/schedule.py | 4 ++--
 loopy/schedule/checker/utils.py    | 5 ++---
 3 files changed, 6 insertions(+), 7 deletions(-)

diff --git a/loopy/schedule/checker/__init__.py b/loopy/schedule/checker/__init__.py
index 260864aff..79fc2e0e8 100644
--- a/loopy/schedule/checker/__init__.py
+++ b/loopy/schedule/checker/__init__.py
@@ -78,10 +78,10 @@ def get_schedule_for_statement_pair(
     #  Test which exercises this: test_linearization_checker_with_stroud_bernstein())
     from loopy.schedule.checker.utils import (
         get_concurrent_inames,
-        _get_EnterLoop_inames,
+        get_EnterLoop_inames,
     )
     conc_inames, _ = get_concurrent_inames(preproc_knl)
-    enterloop_inames = _get_EnterLoop_inames(linearization_items, preproc_knl)
+    enterloop_inames = get_EnterLoop_inames(linearization_items, preproc_knl)
     conc_loop_inames = conc_inames & enterloop_inames
     if conc_loop_inames:
         from warnings import warn
diff --git a/loopy/schedule/checker/schedule.py b/loopy/schedule/checker/schedule.py
index e8e009169..e868f5b19 100644
--- a/loopy/schedule/checker/schedule.py
+++ b/loopy/schedule/checker/schedule.py
@@ -230,9 +230,9 @@ def __init__(
                     stmt_added_since_prev_block_at_tier[-1] = False
             elif isinstance(linearization_item, (RunInstruction, Barrier)):
                 from loopy.schedule.checker.utils import (
-                    _get_insn_id_from_linearization_item,
+                    get_insn_id_from_linearization_item,
                 )
-                lp_insn_id = _get_insn_id_from_linearization_item(linearization_item)
+                lp_insn_id = get_insn_id_from_linearization_item(linearization_item)
                 if lp_insn_id is None:
                     # TODO make sure it's okay to ignore barriers without id
                     # (because they'll never be part of a dependency?)
diff --git a/loopy/schedule/checker/utils.py b/loopy/schedule/checker/utils.py
index cb933de6f..b6a5487b0 100644
--- a/loopy/schedule/checker/utils.py
+++ b/loopy/schedule/checker/utils.py
@@ -310,8 +310,7 @@ def get_concurrent_inames(knl):
     return conc_inames, all_inames-conc_inames
 
 
-def _get_insn_id_from_linearization_item(linearization_item):
-    # TODO could use loopy's sched_item_to_insn_id()
+def get_insn_id_from_linearization_item(linearization_item):
     from loopy.schedule import Barrier
     if isinstance(linearization_item, Barrier):
         return linearization_item.originating_insn_id
@@ -319,7 +318,7 @@ def _get_insn_id_from_linearization_item(linearization_item):
         return linearization_item.insn_id
 
 
-def _get_EnterLoop_inames(linearization_items, knl):
+def get_EnterLoop_inames(linearization_items, knl):
     from loopy.schedule import EnterLoop
     loop_inames = set()
     for linearization_item in linearization_items:

From 70ec6a1bae5fbec9c1ef9b69415c24a079b8b385 Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Sun, 26 Apr 2020 19:27:40 -0500
Subject: [PATCH 034/460] don't pass before/after insn_ids to
 get_isl_maps_for_LexSchedule(), instead get them from LexSchedule

---
 loopy/schedule/checker/__init__.py | 21 +++++----------------
 test/test_linearization_checker.py | 24 ++++++++++++------------
 2 files changed, 17 insertions(+), 28 deletions(-)

diff --git a/loopy/schedule/checker/__init__.py b/loopy/schedule/checker/__init__.py
index 79fc2e0e8..68ca2e1a0 100644
--- a/loopy/schedule/checker/__init__.py
+++ b/loopy/schedule/checker/__init__.py
@@ -108,12 +108,7 @@ def get_schedule_for_statement_pair(
 
 # {{{ Get isl map pair for LexSchedule
 
-def get_isl_maps_for_LexSchedule(
-        lex_sched,
-        knl,
-        insn_id_before,
-        insn_id_after,
-        ):
+def get_isl_maps_for_LexSchedule(lex_sched, knl):
     """Create a pair of :class:`islpy.Map`s representing a
         :class:`loopy.schedule.checker.LexSchedule` as two mappings
         from statement instances to lexicographic time, one for
@@ -127,12 +122,6 @@ def get_isl_maps_for_LexSchedule(
     :arg knl: A :class:`loopy.kernel.LoopKernel` containing the
         linearization items that will be used to create a schedule.
 
-    :arg insn_id_before: An instruction identifier that is unique within
-        a :class:`loopy.kernel.LoopKernel`.
-
-    :arg insn_id_after: An instruction identifier that is unique within
-        a :class:`loopy.kernel.LoopKernel`.
-
     :returns: A two-tuple containing two :class:`islpy.Map`s
         representing the schedule as two mappings
         from statement instances to lexicographic time, one for
@@ -140,10 +129,10 @@ def get_isl_maps_for_LexSchedule(
     """
 
     # {{{ Get iname domains
-    insn_before_inames = knl.id_to_insn[insn_id_before].within_inames
-    insn_after_inames = knl.id_to_insn[insn_id_after].within_inames
-    dom_before = knl.get_inames_domain(insn_before_inames)
-    dom_after = knl.get_inames_domain(insn_after_inames)
+    dom_before = knl.get_inames_domain(
+        knl.id_to_insn[lex_sched.stmt_instance_before.stmt.insn_id].within_inames)
+    dom_after = knl.get_inames_domain(
+        knl.id_to_insn[lex_sched.stmt_instance_after.stmt.insn_id].within_inames)
     # }}}
 
     # {{{ Get isl maps
diff --git a/test/test_linearization_checker.py b/test/test_linearization_checker.py
index c112b40ae..ed936a1ff 100644
--- a/test/test_linearization_checker.py
+++ b/test/test_linearization_checker.py
@@ -138,8 +138,8 @@ def test_lexschedule_and_islmap_creation():
 
     # Get two isl maps representing the LexSchedule
 
-    isl_sched_map_before, isl_sched_map_after = \
-         get_isl_maps_for_LexSchedule(lex_sched_ab, knl, "insn_a", "insn_b")
+    isl_sched_map_before, isl_sched_map_after = get_isl_maps_for_LexSchedule(
+        lex_sched_ab, knl)
 
     # Create expected maps, align, compare
 
@@ -170,8 +170,8 @@ def test_lexschedule_and_islmap_creation():
 
     # Get two isl maps representing the LexSchedule
 
-    isl_sched_map_before, isl_sched_map_after = \
-         get_isl_maps_for_LexSchedule(lex_sched_ac, knl, "insn_a", "insn_c")
+    isl_sched_map_before, isl_sched_map_after = get_isl_maps_for_LexSchedule(
+        lex_sched_ac, knl)
 
     # Create expected maps, align, compare
 
@@ -205,8 +205,8 @@ def perform_insn_ad_checks_with(sid_a, sid_d):
 
         # Get two isl maps representing the LexSchedule
 
-        isl_sched_map_before, isl_sched_map_after = \
-             get_isl_maps_for_LexSchedule(lex_sched_ad, knl, "insn_a", "insn_d")
+        isl_sched_map_before, isl_sched_map_after = get_isl_maps_for_LexSchedule(
+            lex_sched_ad, knl)
 
         # Create expected maps, align, compare
 
@@ -247,8 +247,8 @@ def perform_insn_bc_checks_with(sid_b, sid_c):
 
         # Get two isl maps representing the LexSchedule
 
-        isl_sched_map_before, isl_sched_map_after = \
-             get_isl_maps_for_LexSchedule(lex_sched_bc, knl, "insn_b", "insn_c")
+        isl_sched_map_before, isl_sched_map_after = get_isl_maps_for_LexSchedule(
+            lex_sched_bc, knl)
 
         # Create expected maps, align, compare
 
@@ -289,8 +289,8 @@ def perform_insn_bd_checks_with(sid_b, sid_d):
 
         # Get two isl maps representing the LexSchedule
 
-        isl_sched_map_before, isl_sched_map_after = \
-             get_isl_maps_for_LexSchedule(lex_sched_bd, knl, "insn_b", "insn_d")
+        isl_sched_map_before, isl_sched_map_after = get_isl_maps_for_LexSchedule(
+            lex_sched_bd, knl)
 
         # Create expected maps, align, compare
 
@@ -331,8 +331,8 @@ def perform_insn_cd_checks_with(sid_c, sid_d):
 
         # Get two isl maps representing the LexSchedule
 
-        isl_sched_map_before, isl_sched_map_after = \
-             get_isl_maps_for_LexSchedule(lex_sched_cd, knl, "insn_c", "insn_d")
+        isl_sched_map_before, isl_sched_map_after = get_isl_maps_for_LexSchedule(
+            lex_sched_cd, knl)
 
         # Create expected maps, align, compare
 

From af72169557b38b5bd26e69e80d657fb0d401aeac Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Mon, 27 Apr 2020 18:05:25 -0500
Subject: [PATCH 035/460] add TODO for future consideration of generalizing
 LexSchedule to allow more than two statements

---
 loopy/schedule/checker/schedule.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/loopy/schedule/checker/schedule.py b/loopy/schedule/checker/schedule.py
index e868f5b19..5edeecaab 100644
--- a/loopy/schedule/checker/schedule.py
+++ b/loopy/schedule/checker/schedule.py
@@ -171,6 +171,8 @@ def __init__(
         # LexScheduleStatements
         self.stmt_instance_before = None
         self.stmt_instance_after = None
+        # TODO when/after dependencies are added, consider the possibility
+        # of removing the two-statements-per-LexSchedule limitation
 
         # make sure we don't have an iname name conflict
         # TODO use loopy's existing tool for ensuring unique var names

From 2556e7590f6724b1a49c8370925dc9701aab6097 Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Mon, 27 Apr 2020 18:16:23 -0500
Subject: [PATCH 036/460] remove extra args from get_isl_maps_for_LexSchedule()

---
 test/test_linearization_checker.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/test/test_linearization_checker.py b/test/test_linearization_checker.py
index c6f8d56dc..f51b050ac 100644
--- a/test/test_linearization_checker.py
+++ b/test/test_linearization_checker.py
@@ -436,7 +436,7 @@ def check_sio_for_insn_pair(
 
         # Get two isl maps representing the LexSchedule
         isl_sched_map_before, isl_sched_map_after = get_isl_maps_for_LexSchedule(
-            lex_sched, knl, insn_id_before, insn_id_after)
+            lex_sched, knl)
 
         # get map representing lexicographic ordering
         sched_lex_order_map = lex_sched.get_lex_order_map_for_sched_space()

From 98d744dda96fd835e38ba400d5dbc75f9f076a58 Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Mon, 4 May 2020 23:25:37 -0500
Subject: [PATCH 037/460] remove no-longer-used arg from
 create_symbolic_isl_map_from_tuples()

---
 loopy/schedule/checker/schedule.py | 1 -
 loopy/schedule/checker/utils.py    | 4 ----
 2 files changed, 5 deletions(-)

diff --git a/loopy/schedule/checker/schedule.py b/loopy/schedule/checker/schedule.py
index 5edeecaab..4138336b0 100644
--- a/loopy/schedule/checker/schedule.py
+++ b/loopy/schedule/checker/schedule.py
@@ -391,7 +391,6 @@ def _get_isl_map_for_stmt_inst(
             return create_symbolic_isl_map_from_tuples(
                 tuple_pairs_with_domains=zip(tuple_pair, dom_to_intersect),
                 space=sched_space,
-                statement_var_name=self.statement_var_name,
                 )
 
         map_before = _get_isl_map_for_stmt_inst(
diff --git a/loopy/schedule/checker/utils.py b/loopy/schedule/checker/utils.py
index b6a5487b0..0728e9686 100644
--- a/loopy/schedule/checker/utils.py
+++ b/loopy/schedule/checker/utils.py
@@ -162,7 +162,6 @@ def list_var_names_in_isl_sets(
 def create_symbolic_isl_map_from_tuples(
         tuple_pairs_with_domains,
         space,
-        statement_var_name,
         ):
     """Return an :class:`islpy.Map` constructed using the provided space,
         mapping input->output tuples provided in `tuple_pairs_with_domains`,
@@ -177,9 +176,6 @@ def create_symbolic_isl_map_from_tuples(
 
     :arg space: A :class:`islpy.Space` to be used to create the map.
 
-    :arg statement_var_name: A :class:`str` specifying the name of the
-        isl variable used to represent the unique :class:`int` statement id.
-
     :returns: A :class:`islpy.Map` constructed using the provided space
         as follows. For each `((tup_in, tup_out), domain)` in
         `tuple_pairs_with_domains`, map

From ef79ff1deb95192dc2322b3b86aec3a3e1669f5a Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Tue, 5 May 2020 11:21:20 -0500
Subject: [PATCH 038/460] make var names begin wtih  and remove  variable/check

---
 loopy/schedule/checker/__init__.py | 11 ---------
 loopy/schedule/checker/schedule.py | 14 ++----------
 test/test_linearization_checker.py | 36 ++++++++++++++++++++----------
 3 files changed, 26 insertions(+), 35 deletions(-)

diff --git a/loopy/schedule/checker/__init__.py b/loopy/schedule/checker/__init__.py
index 68ca2e1a0..3215201ae 100644
--- a/loopy/schedule/checker/__init__.py
+++ b/loopy/schedule/checker/__init__.py
@@ -28,7 +28,6 @@ def get_schedule_for_statement_pair(
         linearization_items,
         insn_id_before,
         insn_id_after,
-        prohibited_var_names=set(),
         ):
     """Create a :class:`loopy.schedule.checker.schedule.LexSchedule`
         representing the order of two statements as a mapping from
@@ -51,10 +50,6 @@ def get_schedule_for_statement_pair(
     :arg insn_id_after: An instruction identifier that is unique within
         a :class:`loopy.kernel.LoopKernel`.
 
-    :arg prohibited_var_names: A set of :class:`str` representing
-        variable names that should not be used when creating names for
-        dimensions in a :class:`loopy.schedule.checker.LexSchedule`.
-
     :returns: A :class:`loopy.schedule.checker.schedule.LexSchedule`
         representing the order of two statements as a mapping from
         :class:`loopy.schedule.checker.LexScheduleStatementInstance`
@@ -66,11 +61,6 @@ def get_schedule_for_statement_pair(
     preproc_knl = preprocess_kernel(knl)
     # }}}
 
-    # {{{ By default, don't create LexSchedule variables matching existing inames
-    if not prohibited_var_names:
-        prohibited_var_names = preproc_knl.all_inames()
-    # }}}
-
     # {{{ Find any EnterLoop inames that are tagged as concurrent
     # so that LexSchedule knows to ignore them
     # (In the future, this shouldn't be necessary because there
@@ -98,7 +88,6 @@ def get_schedule_for_statement_pair(
         linearization_items,
         insn_id_before,
         insn_id_after,
-        prohibited_var_names=prohibited_var_names,
         loops_to_ignore=conc_loop_inames,
         )
     # }}}
diff --git a/loopy/schedule/checker/schedule.py b/loopy/schedule/checker/schedule.py
index 4138336b0..49c9e0a01 100644
--- a/loopy/schedule/checker/schedule.py
+++ b/loopy/schedule/checker/schedule.py
@@ -141,15 +141,14 @@ class LexSchedule(object):
 
     """
 
-    statement_var_name = "statement"
-    lex_var_prefix = "l"
+    statement_var_name = "_lp_statement"
+    lex_var_prefix = "_lp_l"
 
     def __init__(
             self,
             linearization_items_ordered,
             before_insn_id,
             after_insn_id,
-            prohibited_var_names=[],
             loops_to_ignore=set(),
             ):
         """
@@ -162,10 +161,6 @@ def __init__(
         :arg after_insn_id: A :class:`str` instruction id specifying
             the depender in this pair of instructions.
 
-        :arg prohibited_var_names: A list of :class:`str` variable names
-            that may not be used as the statement variable name (e.g.,
-            because they are already being used as inames).
-
         """
 
         # LexScheduleStatements
@@ -174,11 +169,6 @@ def __init__(
         # TODO when/after dependencies are added, consider the possibility
         # of removing the two-statements-per-LexSchedule limitation
 
-        # make sure we don't have an iname name conflict
-        # TODO use loopy's existing tool for ensuring unique var names
-        assert not any(
-            iname == self.statement_var_name for iname in prohibited_var_names)
-
         from loopy.schedule import (EnterLoop, LeaveLoop, Barrier, RunInstruction)
 
         # go through linearization_items_ordered and generate self.lex_schedule
diff --git a/test/test_linearization_checker.py b/test/test_linearization_checker.py
index ed936a1ff..39a73718f 100644
--- a/test/test_linearization_checker.py
+++ b/test/test_linearization_checker.py
@@ -145,7 +145,8 @@ def test_lexschedule_and_islmap_creation():
 
     isl_sched_map_before_expected = isl.Map(
         "[pi, pk] -> { "
-        "[statement = 0, i, k] -> [l0 = 0, l1 = i, l2 = 0, l3 = k, l4 = 0] : "
+        "[_lp_statement=0, i, k] -> "
+        "[_lp_l0=0, _lp_l1=i, _lp_l2=0, _lp_l3=k, _lp_l4=0] : "
         "0 <= i < pi and 0 <= k < pk }"
         )
     isl_sched_map_before_expected = align_isl_maps_by_var_names(
@@ -153,7 +154,8 @@ def test_lexschedule_and_islmap_creation():
 
     isl_sched_map_after_expected = isl.Map(
         "[pi, pj] -> { "
-        "[statement = 1, i, j] -> [l0 = 0, l1 = i, l2 = 1, l3 = j, l4 = 0] : "
+        "[_lp_statement=1, i, j] -> "
+        "[_lp_l0=0, _lp_l1=i, _lp_l2=1, _lp_l3=j, _lp_l4=0] : "
         "0 <= i < pi and 0 <= j < pj }"
         )
     isl_sched_map_after_expected = align_isl_maps_by_var_names(
@@ -177,7 +179,8 @@ def test_lexschedule_and_islmap_creation():
 
     isl_sched_map_before_expected = isl.Map(
         "[pi, pk] -> { "
-        "[statement = 0, i, k] -> [l0 = 0, l1 = i, l2 = 0, l3 = k, l4 = 0] : "
+        "[_lp_statement=0, i, k] -> "
+        "[_lp_l0=0, _lp_l1=i, _lp_l2=0, _lp_l3=k, _lp_l4=0] : "
         "0 <= i < pi and 0 <= k < pk }"
         )
     isl_sched_map_before_expected = align_isl_maps_by_var_names(
@@ -185,7 +188,8 @@ def test_lexschedule_and_islmap_creation():
 
     isl_sched_map_after_expected = isl.Map(
         "[pi, pj] -> { "
-        "[statement = 1, i, j] -> [l0 = 0, l1 = i, l2 = 1, l3 = j, l4 = 0] : "
+        "[_lp_statement=1, i, j] -> "
+        "[_lp_l0=0, _lp_l1=i, _lp_l2=1, _lp_l3=j, _lp_l4=0] : "
         "0 <= i < pi and 0 <= j < pj }"
         )
     isl_sched_map_after_expected = align_isl_maps_by_var_names(
@@ -212,7 +216,8 @@ def perform_insn_ad_checks_with(sid_a, sid_d):
 
         isl_sched_map_before_expected = isl.Map(
             "[pi, pk] -> { "
-            "[statement = %d, i, k] -> [l0 = %d, l1 = i, l2 = 0, l3 = k, l4 = 0] : "
+            "[_lp_statement=%d, i, k] -> "
+            "[_lp_l0=%d, _lp_l1=i, _lp_l2=0, _lp_l3=k, _lp_l4=0] : "
             "0 <= i < pi and 0 <= k < pk }"
             % (sid_a, sid_a)
             )
@@ -221,7 +226,8 @@ def perform_insn_ad_checks_with(sid_a, sid_d):
 
         isl_sched_map_after_expected = isl.Map(
             "[pt] -> { "
-            "[statement = %d, t] -> [l0 = %d, l1 = t, l2 = 0, l3 = 0, l4 = 0] : "
+            "[_lp_statement=%d, t] -> "
+            "[_lp_l0=%d, _lp_l1=t, _lp_l2=0, _lp_l3=0, _lp_l4=0] : "
             "0 <= t < pt }"
             % (sid_d, sid_d)
             )
@@ -254,7 +260,8 @@ def perform_insn_bc_checks_with(sid_b, sid_c):
 
         isl_sched_map_before_expected = isl.Map(
             "[pi, pj] -> { "
-            "[statement = %d, i, j] -> [l0 = 0, l1 = i, l2 = 0, l3 = j, l4 = %d] : "
+            "[_lp_statement=%d, i, j] -> "
+            "[_lp_l0=0, _lp_l1=i, _lp_l2=0, _lp_l3=j, _lp_l4=%d] : "
             "0 <= i < pi and 0 <= j < pj }"
             % (sid_b, sid_b)
             )
@@ -263,7 +270,8 @@ def perform_insn_bc_checks_with(sid_b, sid_c):
 
         isl_sched_map_after_expected = isl.Map(
             "[pi, pj] -> { "
-            "[statement = %d, i, j] -> [l0 = 0, l1 = i, l2 = 0, l3 = j, l4 = %d] : "
+            "[_lp_statement=%d, i, j] -> "
+            "[_lp_l0=0, _lp_l1=i, _lp_l2=0, _lp_l3=j, _lp_l4=%d] : "
             "0 <= i < pi and 0 <= j < pj }"
             % (sid_c, sid_c)
             )
@@ -296,7 +304,8 @@ def perform_insn_bd_checks_with(sid_b, sid_d):
 
         isl_sched_map_before_expected = isl.Map(
             "[pi, pj] -> { "
-            "[statement = %d, i, j] -> [l0 = %d, l1 = i, l2 = 0, l3 = j, l4 = 0] : "
+            "[_lp_statement=%d, i, j] -> "
+            "[_lp_l0=%d, _lp_l1=i, _lp_l2=0, _lp_l3=j, _lp_l4=0] : "
             "0 <= i < pi and 0 <= j < pj }"
             % (sid_b, sid_b)
             )
@@ -305,7 +314,8 @@ def perform_insn_bd_checks_with(sid_b, sid_d):
 
         isl_sched_map_after_expected = isl.Map(
             "[pt] -> { "
-            "[statement = %d, t] -> [l0 = %d, l1 = t, l2 = 0, l3 = 0, l4 = 0] : "
+            "[_lp_statement=%d, t] -> "
+            "[_lp_l0=%d, _lp_l1=t, _lp_l2=0, _lp_l3=0, _lp_l4=0] : "
             "0 <= t < pt }"
             % (sid_d, sid_d)
             )
@@ -338,7 +348,8 @@ def perform_insn_cd_checks_with(sid_c, sid_d):
 
         isl_sched_map_before_expected = isl.Map(
             "[pi, pj] -> { "
-            "[statement = %d, i, j] -> [l0 = %d, l1 = i, l2 = 0, l3 = j, l4 = 0] : "
+            "[_lp_statement=%d, i, j] -> "
+            "[_lp_l0=%d, _lp_l1=i, _lp_l2=0, _lp_l3=j, _lp_l4=0] : "
             "0 <= i < pi and 0 <= j < pj }"
             % (sid_c, sid_c)
             )
@@ -347,7 +358,8 @@ def perform_insn_cd_checks_with(sid_c, sid_d):
 
         isl_sched_map_after_expected = isl.Map(
             "[pt] -> { "
-            "[statement = %d, t] -> [l0 = %d, l1 = t, l2 = 0, l3 = 0, l4 = 0] : "
+            "[_lp_statement=%d, t] -> "
+            "[_lp_l0=%d, _lp_l1=t, _lp_l2=0, _lp_l3=0, _lp_l4=0] : "
             "0 <= t < pt }"
             % (sid_d, sid_d)
             )

From ec11b2f65f4825f2e0526e1418d8a8e7b0ad1355 Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Tue, 5 May 2020 12:40:02 -0500
Subject: [PATCH 039/460] change _lp previx to _lp_sched (add sub-prefix)

---
 loopy/schedule/checker/schedule.py |  4 +-
 test/test_linearization_checker.py | 60 ++++++++++++++++++------------
 2 files changed, 38 insertions(+), 26 deletions(-)

diff --git a/loopy/schedule/checker/schedule.py b/loopy/schedule/checker/schedule.py
index 49c9e0a01..6cabaf1be 100644
--- a/loopy/schedule/checker/schedule.py
+++ b/loopy/schedule/checker/schedule.py
@@ -141,8 +141,8 @@ class LexSchedule(object):
 
     """
 
-    statement_var_name = "_lp_statement"
-    lex_var_prefix = "_lp_l"
+    statement_var_name = "_lp_sched_statement"
+    lex_var_prefix = "_lp_sched_l"
 
     def __init__(
             self,
diff --git a/test/test_linearization_checker.py b/test/test_linearization_checker.py
index 39a73718f..0dfb2fc90 100644
--- a/test/test_linearization_checker.py
+++ b/test/test_linearization_checker.py
@@ -145,8 +145,9 @@ def test_lexschedule_and_islmap_creation():
 
     isl_sched_map_before_expected = isl.Map(
         "[pi, pk] -> { "
-        "[_lp_statement=0, i, k] -> "
-        "[_lp_l0=0, _lp_l1=i, _lp_l2=0, _lp_l3=k, _lp_l4=0] : "
+        "[_lp_sched_statement=0, i, k] -> "
+        "[_lp_sched_l0=0, _lp_sched_l1=i, _lp_sched_l2=0, _lp_sched_l3=k, "
+        "_lp_sched_l4=0] : "
         "0 <= i < pi and 0 <= k < pk }"
         )
     isl_sched_map_before_expected = align_isl_maps_by_var_names(
@@ -154,8 +155,9 @@ def test_lexschedule_and_islmap_creation():
 
     isl_sched_map_after_expected = isl.Map(
         "[pi, pj] -> { "
-        "[_lp_statement=1, i, j] -> "
-        "[_lp_l0=0, _lp_l1=i, _lp_l2=1, _lp_l3=j, _lp_l4=0] : "
+        "[_lp_sched_statement=1, i, j] -> "
+        "[_lp_sched_l0=0, _lp_sched_l1=i, _lp_sched_l2=1, _lp_sched_l3=j, "
+        "_lp_sched_l4=0] : "
         "0 <= i < pi and 0 <= j < pj }"
         )
     isl_sched_map_after_expected = align_isl_maps_by_var_names(
@@ -179,8 +181,9 @@ def test_lexschedule_and_islmap_creation():
 
     isl_sched_map_before_expected = isl.Map(
         "[pi, pk] -> { "
-        "[_lp_statement=0, i, k] -> "
-        "[_lp_l0=0, _lp_l1=i, _lp_l2=0, _lp_l3=k, _lp_l4=0] : "
+        "[_lp_sched_statement=0, i, k] -> "
+        "[_lp_sched_l0=0, _lp_sched_l1=i, _lp_sched_l2=0, _lp_sched_l3=k, "
+        "_lp_sched_l4=0] : "
         "0 <= i < pi and 0 <= k < pk }"
         )
     isl_sched_map_before_expected = align_isl_maps_by_var_names(
@@ -188,8 +191,9 @@ def test_lexschedule_and_islmap_creation():
 
     isl_sched_map_after_expected = isl.Map(
         "[pi, pj] -> { "
-        "[_lp_statement=1, i, j] -> "
-        "[_lp_l0=0, _lp_l1=i, _lp_l2=1, _lp_l3=j, _lp_l4=0] : "
+        "[_lp_sched_statement=1, i, j] -> "
+        "[_lp_sched_l0=0, _lp_sched_l1=i, _lp_sched_l2=1, _lp_sched_l3=j, "
+        "_lp_sched_l4=0] : "
         "0 <= i < pi and 0 <= j < pj }"
         )
     isl_sched_map_after_expected = align_isl_maps_by_var_names(
@@ -216,8 +220,9 @@ def perform_insn_ad_checks_with(sid_a, sid_d):
 
         isl_sched_map_before_expected = isl.Map(
             "[pi, pk] -> { "
-            "[_lp_statement=%d, i, k] -> "
-            "[_lp_l0=%d, _lp_l1=i, _lp_l2=0, _lp_l3=k, _lp_l4=0] : "
+            "[_lp_sched_statement=%d, i, k] -> "
+            "[_lp_sched_l0=%d, _lp_sched_l1=i, _lp_sched_l2=0, _lp_sched_l3=k, "
+            "_lp_sched_l4=0] : "
             "0 <= i < pi and 0 <= k < pk }"
             % (sid_a, sid_a)
             )
@@ -226,8 +231,9 @@ def perform_insn_ad_checks_with(sid_a, sid_d):
 
         isl_sched_map_after_expected = isl.Map(
             "[pt] -> { "
-            "[_lp_statement=%d, t] -> "
-            "[_lp_l0=%d, _lp_l1=t, _lp_l2=0, _lp_l3=0, _lp_l4=0] : "
+            "[_lp_sched_statement=%d, t] -> "
+            "[_lp_sched_l0=%d, _lp_sched_l1=t, _lp_sched_l2=0, _lp_sched_l3=0, "
+            "_lp_sched_l4=0] : "
             "0 <= t < pt }"
             % (sid_d, sid_d)
             )
@@ -260,8 +266,9 @@ def perform_insn_bc_checks_with(sid_b, sid_c):
 
         isl_sched_map_before_expected = isl.Map(
             "[pi, pj] -> { "
-            "[_lp_statement=%d, i, j] -> "
-            "[_lp_l0=0, _lp_l1=i, _lp_l2=0, _lp_l3=j, _lp_l4=%d] : "
+            "[_lp_sched_statement=%d, i, j] -> "
+            "[_lp_sched_l0=0, _lp_sched_l1=i, _lp_sched_l2=0, _lp_sched_l3=j, "
+            "_lp_sched_l4=%d] : "
             "0 <= i < pi and 0 <= j < pj }"
             % (sid_b, sid_b)
             )
@@ -270,8 +277,9 @@ def perform_insn_bc_checks_with(sid_b, sid_c):
 
         isl_sched_map_after_expected = isl.Map(
             "[pi, pj] -> { "
-            "[_lp_statement=%d, i, j] -> "
-            "[_lp_l0=0, _lp_l1=i, _lp_l2=0, _lp_l3=j, _lp_l4=%d] : "
+            "[_lp_sched_statement=%d, i, j] -> "
+            "[_lp_sched_l0=0, _lp_sched_l1=i, _lp_sched_l2=0, _lp_sched_l3=j, "
+            "_lp_sched_l4=%d] : "
             "0 <= i < pi and 0 <= j < pj }"
             % (sid_c, sid_c)
             )
@@ -304,8 +312,9 @@ def perform_insn_bd_checks_with(sid_b, sid_d):
 
         isl_sched_map_before_expected = isl.Map(
             "[pi, pj] -> { "
-            "[_lp_statement=%d, i, j] -> "
-            "[_lp_l0=%d, _lp_l1=i, _lp_l2=0, _lp_l3=j, _lp_l4=0] : "
+            "[_lp_sched_statement=%d, i, j] -> "
+            "[_lp_sched_l0=%d, _lp_sched_l1=i, _lp_sched_l2=0, _lp_sched_l3=j, "
+            "_lp_sched_l4=0] : "
             "0 <= i < pi and 0 <= j < pj }"
             % (sid_b, sid_b)
             )
@@ -314,8 +323,9 @@ def perform_insn_bd_checks_with(sid_b, sid_d):
 
         isl_sched_map_after_expected = isl.Map(
             "[pt] -> { "
-            "[_lp_statement=%d, t] -> "
-            "[_lp_l0=%d, _lp_l1=t, _lp_l2=0, _lp_l3=0, _lp_l4=0] : "
+            "[_lp_sched_statement=%d, t] -> "
+            "[_lp_sched_l0=%d, _lp_sched_l1=t, _lp_sched_l2=0, _lp_sched_l3=0, "
+            "_lp_sched_l4=0] : "
             "0 <= t < pt }"
             % (sid_d, sid_d)
             )
@@ -348,8 +358,9 @@ def perform_insn_cd_checks_with(sid_c, sid_d):
 
         isl_sched_map_before_expected = isl.Map(
             "[pi, pj] -> { "
-            "[_lp_statement=%d, i, j] -> "
-            "[_lp_l0=%d, _lp_l1=i, _lp_l2=0, _lp_l3=j, _lp_l4=0] : "
+            "[_lp_sched_statement=%d, i, j] -> "
+            "[_lp_sched_l0=%d, _lp_sched_l1=i, _lp_sched_l2=0, _lp_sched_l3=j, "
+            "_lp_sched_l4=0] : "
             "0 <= i < pi and 0 <= j < pj }"
             % (sid_c, sid_c)
             )
@@ -358,8 +369,9 @@ def perform_insn_cd_checks_with(sid_c, sid_d):
 
         isl_sched_map_after_expected = isl.Map(
             "[pt] -> { "
-            "[_lp_statement=%d, t] -> "
-            "[_lp_l0=%d, _lp_l1=t, _lp_l2=0, _lp_l3=0, _lp_l4=0] : "
+            "[_lp_sched_statement=%d, t] -> "
+            "[_lp_sched_l0=%d, _lp_sched_l1=t, _lp_sched_l2=0, _lp_sched_l3=0, "
+            "_lp_sched_l4=0] : "
             "0 <= t < pt }"
             % (sid_d, sid_d)
             )

From f38f3027c1b575c6cbce1849b80a37292accbb85 Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Tue, 12 May 2020 00:47:46 -0500
Subject: [PATCH 040/460] add new reserved prefix to map vars

---
 test/test_linearization_checker.py | 55 +++++++++++++++++++-----------
 1 file changed, 36 insertions(+), 19 deletions(-)

diff --git a/test/test_linearization_checker.py b/test/test_linearization_checker.py
index 9ce2f981e..1e5457b94 100644
--- a/test/test_linearization_checker.py
+++ b/test/test_linearization_checker.py
@@ -485,23 +485,40 @@ def check_sio_for_insn_pair(
 
         assert sio_aligned == expected_sio
 
-    expected_lex_order_map = isl.Map(
-        "{ "
-        "[l0, l1, l2, l3, l4] -> [l0_, l1_, l2_, l3_, l4_]: l0_ > l0; "
-        "[l0, l1, l2, l3, l4] -> [l0_= l0, l1_, l2_, l3_, l4_]: l1_ > l1; "
-        "[l0, l1, l2, l3, l4] -> [l0_= l0, l1_= l1, l2_, l3_, l4_]: l2_ > l2; "
-        "[l0, l1, l2, l3, l4] -> [l0_= l0, l1_= l1, l2_= l2, l3_, l4_]: l3_ > l3; "
-        "[l0, l1, l2, l3, l4] -> [l0_= l0, l1_= l1, l2_= l2, l3_= l3, l4_]: l4_ > l4"
-        "}"
-        )
+    expected_lex_order_map = isl.Map("{ "
+        "[_lp_sched_l0, _lp_sched_l1, _lp_sched_l2, _lp_sched_l3, _lp_sched_l4] -> "
+        "[_lp_sched_l0_, _lp_sched_l1_, _lp_sched_l2_, _lp_sched_l3_, _lp_sched_l4_]"
+        ":"
+        "("
+        "_lp_sched_l0_ > _lp_sched_l0 "
+        ") or ("
+        "_lp_sched_l0_= _lp_sched_l0 and "
+        "_lp_sched_l1_ > _lp_sched_l1 "
+        ") or ("
+        "_lp_sched_l0_= _lp_sched_l0 and "
+        "_lp_sched_l1_= _lp_sched_l1 and "
+        "_lp_sched_l2_ > _lp_sched_l2 "
+        ") or ("
+        "_lp_sched_l0_= _lp_sched_l0 and "
+        "_lp_sched_l1_= _lp_sched_l1 and "
+        "_lp_sched_l2_= _lp_sched_l2 and "
+        "_lp_sched_l3_ > _lp_sched_l3 "
+        ") or ("
+        "_lp_sched_l0_= _lp_sched_l0 and "
+        "_lp_sched_l1_= _lp_sched_l1 and "
+        "_lp_sched_l2_= _lp_sched_l2 and "
+        "_lp_sched_l3_= _lp_sched_l3 and "
+        "_lp_sched_l4_ > _lp_sched_l4"
+        ")"
+        "}")
 
     # Relationship between insn_a and insn_b ---------------------------------------
 
     expected_sio = isl.Map(
         "[pi, pj, pk] -> { "
-        "[statement' = 0, i', k'] -> [statement = 1, i, j] : "
+        "[_lp_sched_statement'=0, i', k'] -> [_lp_sched_statement=1, i, j]:"
         "0 <= i' < pi and 0 <= k' < pk and 0 <= j < pj and 0 <= i < pi and i > i'; "
-        "[statement' = 0, i', k'] -> [statement = 1, i = i', j] : "
+        "[_lp_sched_statement'=0, i', k'] -> [_lp_sched_statement=1, i=i', j]:"
         "0 <= i' < pi and 0 <= k' < pk and 0 <= j < pj "
         "}"
         )
@@ -516,9 +533,9 @@ def check_sio_for_insn_pair(
 
     expected_sio = isl.Map(
         "[pi, pj, pk] -> { "
-        "[statement' = 0, i', k'] -> [statement = 1, i, j] : "
+        "[_lp_sched_statement'=0, i', k'] -> [_lp_sched_statement=1, i, j]:"
         "0 <= i' < pi and 0 <= k' < pk and 0 <= j < pj and 0 <= i < pi and i > i'; "
-        "[statement' = 0, i', k'] -> [statement = 1, i = i', j] : "
+        "[_lp_sched_statement'=0, i', k'] -> [_lp_sched_statement=1, i=i', j]:"
         "0 <= i' < pi and 0 <= k' < pk and 0 <= j < pj "
         "}"
         )
@@ -533,7 +550,7 @@ def check_sio_for_insn_pair(
 
     expected_sio = isl.Map(
         "[pt, pi, pk] -> { "
-        "[statement' = 0, i', k'] -> [statement = 1, t] : "
+        "[_lp_sched_statement'=0, i', k'] -> [_lp_sched_statement=1, t]:"
         "0 <= i' < pi and 0 <= k' < pk and 0 <= t < pt "
         "}"
         )
@@ -548,11 +565,11 @@ def check_sio_for_insn_pair(
 
     expected_sio = isl.Map(
         "[pi, pj] -> { "
-        "[statement' = 0, i', j'] -> [statement = 1, i, j] : "
+        "[_lp_sched_statement'=0, i', j'] -> [_lp_sched_statement=1, i, j]:"
         "0 <= i' < pi and 0 <= j' < pj and i > i' and 0 <= i < pi and 0 <= j < pj; "
-        "[statement' = 0, i', j'] -> [statement = 1, i = i', j] : "
+        "[_lp_sched_statement'=0, i', j'] -> [_lp_sched_statement=1, i=i', j]:"
         "0 <= i' < pi and 0 <= j' < pj and j > j' and 0 <= j < pj; "
-        "[statement' = 0, i', j'] -> [statement = 1, i = i', j = j'] : "
+        "[_lp_sched_statement'=0, i', j'] -> [_lp_sched_statement=1, i=i', j=j']:"
         "0 <= i' < pi and 0 <= j' < pj "
         "}"
         )
@@ -567,7 +584,7 @@ def check_sio_for_insn_pair(
 
     expected_sio = isl.Map(
         "[pt, pi, pj] -> { "
-        "[statement' = 0, i', j'] -> [statement = 1, t] : "
+        "[_lp_sched_statement'=0, i', j'] -> [_lp_sched_statement=1, t]:"
         "0 <= i' < pi and 0 <= j' < pj and 0 <= t < pt "
         "}"
         )
@@ -582,7 +599,7 @@ def check_sio_for_insn_pair(
 
     expected_sio = isl.Map(
         "[pt, pi, pj] -> { "
-        "[statement' = 0, i', j'] -> [statement = 1, t] : "
+        "[_lp_sched_statement'=0, i', j'] -> [_lp_sched_statement=1, t]:"
         "0 <= i' < pi and 0 <= j' < pj and 0 <= t < pt "
         "}"
         )

From d4506a0ef3d0f8bf3adf3efbe231f4be6d1cbc09 Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Tue, 12 May 2020 01:08:24 -0500
Subject: [PATCH 041/460] =?UTF-8?q?use=20composition=20symbol=20=E2=97=A6?=
 =?UTF-8?q?=20in=20docstring=20for=20get=5Fstatement=5Fordering=5Fmap?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 loopy/schedule/checker/lexicographic_order_map.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/loopy/schedule/checker/lexicographic_order_map.py b/loopy/schedule/checker/lexicographic_order_map.py
index ddc320ed9..f42e8e610 100644
--- a/loopy/schedule/checker/lexicographic_order_map.py
+++ b/loopy/schedule/checker/lexicographic_order_map.py
@@ -46,7 +46,7 @@ def get_statement_ordering_map(
 
     :returns: An :class:`islpy.Map` representing the lex schedule as
         a mapping from each statement instance to all statement instances
-        occuring later. I.e., we compose B -> L -> A^-1, where B
+        occuring later. I.e., we compose B ◦ L ◦ A^-1, where B
         is sched_map_before, A is sched_map_after, and L is the
         lexicographic ordering map.
 

From 1568d79dd0d36a33e77efb6ad94d997e6fa2e217 Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Tue, 12 May 2020 01:12:18 -0500
Subject: [PATCH 042/460] in docstring for get_statement_ordering_map(),
 clarify that we are composing relations

---
 loopy/schedule/checker/lexicographic_order_map.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/loopy/schedule/checker/lexicographic_order_map.py b/loopy/schedule/checker/lexicographic_order_map.py
index f42e8e610..ce8808119 100644
--- a/loopy/schedule/checker/lexicographic_order_map.py
+++ b/loopy/schedule/checker/lexicographic_order_map.py
@@ -46,9 +46,9 @@ def get_statement_ordering_map(
 
     :returns: An :class:`islpy.Map` representing the lex schedule as
         a mapping from each statement instance to all statement instances
-        occuring later. I.e., we compose B ◦ L ◦ A^-1, where B
-        is sched_map_before, A is sched_map_after, and L is the
-        lexicographic ordering map.
+        occuring later. I.e., we compose relations B, L, and A as
+        B ◦ L ◦ A^-1, where B is sched_map_before, A is sched_map_after,
+        and L is the lexicographic ordering map.
 
     """
 

From a2c007b2f6908d72ccbd1c125347ee1e0f5e1c7a Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Tue, 19 May 2020 00:04:56 -0500
Subject: [PATCH 043/460] try a slightlyl different function composition symbol
 (to address 'Non-ASCII character' syntax error)

---
 loopy/schedule/checker/lexicographic_order_map.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/loopy/schedule/checker/lexicographic_order_map.py b/loopy/schedule/checker/lexicographic_order_map.py
index ce8808119..9807d293f 100644
--- a/loopy/schedule/checker/lexicographic_order_map.py
+++ b/loopy/schedule/checker/lexicographic_order_map.py
@@ -47,7 +47,7 @@ def get_statement_ordering_map(
     :returns: An :class:`islpy.Map` representing the lex schedule as
         a mapping from each statement instance to all statement instances
         occuring later. I.e., we compose relations B, L, and A as
-        B ◦ L ◦ A^-1, where B is sched_map_before, A is sched_map_after,
+        B ∘ L ∘ A^-1, where B is sched_map_before, A is sched_map_after,
         and L is the lexicographic ordering map.
 
     """

From 11f8edd708ada13db5f81aa6b2d87638978155ca Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Tue, 19 May 2020 00:11:31 -0500
Subject: [PATCH 044/460] add 'coding: utf-8' at top of file to allow
 composition character

---
 loopy/schedule/checker/lexicographic_order_map.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/loopy/schedule/checker/lexicographic_order_map.py b/loopy/schedule/checker/lexicographic_order_map.py
index 9807d293f..5ce2bb4a5 100644
--- a/loopy/schedule/checker/lexicographic_order_map.py
+++ b/loopy/schedule/checker/lexicographic_order_map.py
@@ -1,3 +1,4 @@
+# coding: utf-8
 __copyright__ = "Copyright (C) 2019 James Stevens"
 
 __license__ = """

From db5fefe4c803947855484b96ce3132a3dc0a4a45 Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Tue, 19 May 2020 01:57:43 -0500
Subject: [PATCH 045/460] improve time complexity of get_lex_order_constraint()

---
 .../checker/lexicographic_order_map.py        | 30 +++++++++++++++----
 1 file changed, 24 insertions(+), 6 deletions(-)

diff --git a/loopy/schedule/checker/lexicographic_order_map.py b/loopy/schedule/checker/lexicographic_order_map.py
index 5ce2bb4a5..d783bac76 100644
--- a/loopy/schedule/checker/lexicographic_order_map.py
+++ b/loopy/schedule/checker/lexicographic_order_map.py
@@ -92,14 +92,32 @@ def get_lex_order_constraint(islvars, before_names, after_names):
 
     """
 
+    # Initialize constraint with i0' < i0
     lex_order_constraint = islvars[before_names[0]].lt_set(islvars[after_names[0]])
+
+    # Initialize conjunction constraint with True.
+    # For each dim d, starting with d=1, this conjunction will have d equalities,
+    # e.g., (i0' = i0 and i1' = i1 and ... i(d-1)' = i(d-1))
+    equality_constraint_conj = islvars[0].eq_set(islvars[0])
+
     for i in range(1, len(before_names)):
-        lex_order_constraint_conj = islvars[before_names[i]].lt_set(
-            islvars[after_names[i]])
-        for j in range(i):
-            lex_order_constraint_conj = lex_order_constraint_conj & \
-                islvars[before_names[j]].eq_set(islvars[after_names[j]])
-        lex_order_constraint = lex_order_constraint | lex_order_constraint_conj
+
+        # Add the next equality constraint to equality_constraint_conj
+        equality_constraint_conj = equality_constraint_conj & \
+            islvars[before_names[i-1]].eq_set(islvars[after_names[i-1]])
+
+        # Create a conjunction constraint by combining a less-than
+        # constraint for this dim, e.g., (i1' < i1), with the current
+        # equality constraint conjunction.
+        # For each dim d, starting with d=1, this conjunction will have d equalities,
+        # and one inequality,
+        # e.g., (i0' = i0 and i1' = i1 and ... i(d-1)' = i(d-1) and id' < id)
+        full_conj_constraint = islvars[before_names[i]].lt_set(
+            islvars[after_names[i]]) & equality_constraint_conj
+
+        # Union this new constraint with the current lex_order_constraint
+        lex_order_constraint = lex_order_constraint | full_conj_constraint
+
     return lex_order_constraint
 
 

From 97e90820c5c232b845bf5063bfe2a71bd3bee01b Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Tue, 19 May 2020 02:22:12 -0500
Subject: [PATCH 046/460] have create_lex_order_map() put apostrophes on
 'before' vars for consistency with other logic

---
 .../checker/lexicographic_order_map.py        |  6 +--
 loopy/schedule/checker/schedule.py            |  2 +-
 test/test_linearization_checker.py            | 40 +++++++++++--------
 3 files changed, 27 insertions(+), 21 deletions(-)

diff --git a/loopy/schedule/checker/lexicographic_order_map.py b/loopy/schedule/checker/lexicographic_order_map.py
index d783bac76..17b6616ca 100644
--- a/loopy/schedule/checker/lexicographic_order_map.py
+++ b/loopy/schedule/checker/lexicographic_order_map.py
@@ -153,13 +153,13 @@ def create_lex_order_map(
 
     """
 
-    if before_names is None:
-        before_names = ["i%s" % (i) for i in range(n_dims)]
     if after_names is None:
+        after_names = ["i%s" % (i) for i in range(n_dims)]
+    if before_names is None:
         from loopy.schedule.checker.utils import (
             append_marker_to_strings,
         )
-        after_names = append_marker_to_strings(before_names, marker="_")
+        before_names = append_marker_to_strings(after_names, marker="'")
 
     assert len(before_names) == len(after_names) == n_dims
     dim_type = isl.dim_type
diff --git a/loopy/schedule/checker/schedule.py b/loopy/schedule/checker/schedule.py
index ea0829199..a87723480 100644
--- a/loopy/schedule/checker/schedule.py
+++ b/loopy/schedule/checker/schedule.py
@@ -409,7 +409,7 @@ def get_lex_order_map_for_sched_space(self):
         )
         n_dims = self.max_lex_dims()
         return create_lex_order_map(
-            n_dims, before_names=self.get_lex_var_names())
+            n_dims, after_names=self.get_lex_var_names())
 
     def __str__(self):
 
diff --git a/test/test_linearization_checker.py b/test/test_linearization_checker.py
index 1e5457b94..e57df9ac8 100644
--- a/test/test_linearization_checker.py
+++ b/test/test_linearization_checker.py
@@ -486,32 +486,38 @@ def check_sio_for_insn_pair(
         assert sio_aligned == expected_sio
 
     expected_lex_order_map = isl.Map("{ "
-        "[_lp_sched_l0, _lp_sched_l1, _lp_sched_l2, _lp_sched_l3, _lp_sched_l4] -> "
-        "[_lp_sched_l0_, _lp_sched_l1_, _lp_sched_l2_, _lp_sched_l3_, _lp_sched_l4_]"
+        "[_lp_sched_l0', _lp_sched_l1', _lp_sched_l2', _lp_sched_l3', _lp_sched_l4']"
+        " -> [_lp_sched_l0, _lp_sched_l1, _lp_sched_l2, _lp_sched_l3, _lp_sched_l4]"
         ":"
         "("
-        "_lp_sched_l0_ > _lp_sched_l0 "
+        "_lp_sched_l0' < _lp_sched_l0 "
         ") or ("
-        "_lp_sched_l0_= _lp_sched_l0 and "
-        "_lp_sched_l1_ > _lp_sched_l1 "
+        "_lp_sched_l0'= _lp_sched_l0 and "
+        "_lp_sched_l1' < _lp_sched_l1 "
         ") or ("
-        "_lp_sched_l0_= _lp_sched_l0 and "
-        "_lp_sched_l1_= _lp_sched_l1 and "
-        "_lp_sched_l2_ > _lp_sched_l2 "
+        "_lp_sched_l0'= _lp_sched_l0 and "
+        "_lp_sched_l1'= _lp_sched_l1 and "
+        "_lp_sched_l2' < _lp_sched_l2 "
         ") or ("
-        "_lp_sched_l0_= _lp_sched_l0 and "
-        "_lp_sched_l1_= _lp_sched_l1 and "
-        "_lp_sched_l2_= _lp_sched_l2 and "
-        "_lp_sched_l3_ > _lp_sched_l3 "
+        "_lp_sched_l0'= _lp_sched_l0 and "
+        "_lp_sched_l1'= _lp_sched_l1 and "
+        "_lp_sched_l2'= _lp_sched_l2 and "
+        "_lp_sched_l3' < _lp_sched_l3 "
         ") or ("
-        "_lp_sched_l0_= _lp_sched_l0 and "
-        "_lp_sched_l1_= _lp_sched_l1 and "
-        "_lp_sched_l2_= _lp_sched_l2 and "
-        "_lp_sched_l3_= _lp_sched_l3 and "
-        "_lp_sched_l4_ > _lp_sched_l4"
+        "_lp_sched_l0'= _lp_sched_l0 and "
+        "_lp_sched_l1'= _lp_sched_l1 and "
+        "_lp_sched_l2'= _lp_sched_l2 and "
+        "_lp_sched_l3'= _lp_sched_l3 and "
+        "_lp_sched_l4' < _lp_sched_l4"
         ")"
         "}")
 
+    # Isl ignores these apostrophes, but test would still pass since it ignores
+    # variable names when checking for equality. Even so, explicitly add apostrophes
+    # for sanity.
+    expected_lex_order_map = append_marker_to_isl_map_var_names(
+        expected_lex_order_map, isl.dim_type.in_, "'")
+
     # Relationship between insn_a and insn_b ---------------------------------------
 
     expected_sio = isl.Map(

From 767e821c7cc56331230c61e7d2193dbb6860394f Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Mon, 25 May 2020 02:22:26 -0500
Subject: [PATCH 047/460] in docstring for LexScheduleStatement, describe usage
 of int_id

---
 loopy/schedule/checker/schedule.py | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/loopy/schedule/checker/schedule.py b/loopy/schedule/checker/schedule.py
index 6cabaf1be..bbea293a2 100644
--- a/loopy/schedule/checker/schedule.py
+++ b/loopy/schedule/checker/schedule.py
@@ -28,11 +28,17 @@ class LexScheduleStatement(object):
 
     .. attribute:: insn_id
 
-       A :class:`str` specifying the instruction id.
+       A :class:`str` specifying the :mod:`loopy` instruction id
+       for this statement.
 
     .. attribute:: int_id
 
-       A :class:`int` uniquely identifying the instruction.
+       A :class:`int` uniquely identifying the statement within a
+       :class:`LexSchedule`. A :class:`LexSchedule` describes a mapping
+       from points in a space of statement instances to points in a
+       lexicographic ordering. The `statement` dimension of a point
+       in the statement instance space representing an instance of this
+       statement is assigned this value (`int_id`).
 
     .. attribute:: within_inames
 

From 9d52cc98da397f5ae0987cead564dc427e105459 Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Mon, 25 May 2020 02:47:29 -0500
Subject: [PATCH 048/460] remove within_inames attribute from
 LexScheduleStatement()

---
 loopy/schedule/checker/schedule.py | 16 +---------------
 1 file changed, 1 insertion(+), 15 deletions(-)

diff --git a/loopy/schedule/checker/schedule.py b/loopy/schedule/checker/schedule.py
index bbea293a2..7f808abd5 100644
--- a/loopy/schedule/checker/schedule.py
+++ b/loopy/schedule/checker/schedule.py
@@ -40,28 +40,20 @@ class LexScheduleStatement(object):
        in the statement instance space representing an instance of this
        statement is assigned this value (`int_id`).
 
-    .. attribute:: within_inames
-
-       A :class:`list` of :class:`str` inames identifying the loops within
-       which this statement will be executed.
-
     """
 
     def __init__(
             self,
             insn_id,  # loopy insn id
             int_id=None,  # sid int (statement id within LexSchedule)
-            within_inames=None,  # [string, ]
             ):
         self.insn_id = insn_id  # string
         self.int_id = int_id
-        self.within_inames = within_inames
 
     def __eq__(self, other):
         return (
             self.insn_id == other.insn_id
             and self.int_id == other.int_id
-            and self.within_inames == other.within_inames
             )
 
     def update_persistent_hash(self, key_hash, key_builder):
@@ -71,19 +63,13 @@ def update_persistent_hash(self, key_hash, key_builder):
 
         key_builder.rec(key_hash, self.insn_id)
         key_builder.rec(key_hash, self.int_id)
-        key_builder.rec(key_hash, self.within_inames)
 
     def __str__(self):
         if self.int_id is not None:
             int_id = ":%d" % (self.int_id)
         else:
             int_id = ""
-        if self.within_inames:
-            within_inames = " {%s}" % (",".join(self.within_inames))
-        else:
-            within_inames = ""
-        return "%s%s%s" % (
-            self.insn_id, int_id, within_inames)
+        return "%s%s" % (self.insn_id, int_id)
 
 
 class LexScheduleStatementInstance(object):

From a19113153a84148545c08837c2055a774b5f3e75 Mon Sep 17 00:00:00 2001
From: James Stevens <jdsteve2@illinois.edu>
Date: Mon, 25 May 2020 10:19:25 +0200
Subject: [PATCH 049/460] Apply suggestion to
 loopy/schedule/checker/schedule.py

---
 loopy/schedule/checker/schedule.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/loopy/schedule/checker/schedule.py b/loopy/schedule/checker/schedule.py
index 7f808abd5..ea3181b66 100644
--- a/loopy/schedule/checker/schedule.py
+++ b/loopy/schedule/checker/schedule.py
@@ -73,7 +73,7 @@ def __str__(self):
 
 
 class LexScheduleStatementInstance(object):
-    """A representation of a :mod:`loopy` statement instance.
+    """A representation of a statement instance.
 
     .. attribute:: stmt
 

From bcef27aa953a07b5fcb49e1c92449bc041d22699 Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Mon, 25 May 2020 21:12:31 -0500
Subject: [PATCH 050/460] rename {lex_pt->lex_points,
 pad_lex_pts_with_zeros->pad_lex_tuples_with_zeros,
 _pad_lex_pt_with_zeros->_pad_lex_tuple_with_zeros,
 next_insn_lex_pt->next_insn_lex_tuple}; lex_pt actually describes multiple
 points, not a single point

---
 loopy/schedule/checker/schedule.py | 70 +++++++++++++++---------------
 test/test_linearization_checker.py | 56 ++++++++++++------------
 2 files changed, 63 insertions(+), 63 deletions(-)

diff --git a/loopy/schedule/checker/schedule.py b/loopy/schedule/checker/schedule.py
index ea3181b66..052a47afe 100644
--- a/loopy/schedule/checker/schedule.py
+++ b/loopy/schedule/checker/schedule.py
@@ -79,7 +79,7 @@ class LexScheduleStatementInstance(object):
 
        A :class:`LexScheduleStatement`.
 
-    .. attribute:: lex_pt
+    .. attribute:: lex_points
 
        A list of :class:`int` or as :class:`str` :mod:`loopy` inames representing
        a point or set of points in a lexicographic ordering.
@@ -89,13 +89,13 @@ class LexScheduleStatementInstance(object):
     def __init__(
             self,
             stmt,  # a LexScheduleStatement
-            lex_pt,  # [string/int, ]
+            lex_points,  # [string/int, ]
             ):
         self.stmt = stmt
-        self.lex_pt = lex_pt
+        self.lex_points = lex_points
 
     def __str__(self):
-        return "{%s, %s}" % (self.stmt, self.lex_pt)
+        return "{%s, %s}" % (self.stmt, self.lex_points)
 
 
 class LexSchedule(object):
@@ -167,7 +167,7 @@ def __init__(
 
         # keep track of the next point in our lexicographic ordering
         # initially this as a 1-d point with value 0
-        next_insn_lex_pt = [0]
+        next_insn_lex_tuple = [0]
         stmt_added_since_prev_block_at_tier = [False]
         next_sid = 0
         for linearization_item in linearization_items_ordered:
@@ -176,13 +176,13 @@ def __init__(
                 if iname in loops_to_ignore:
                     continue
 
-                # We could always increment next_insn_lex_pt[-1] here since this new
-                # section of code comes after the previous section (statements
-                # since last opened/closed loop), but if we have not added any
-                # statements within the previous section yet, we don't have to
-                # (effectively ignoring that section of code).
+                # We could always increment next_insn_lex_tuple[-1] here since
+                # this new section of code comes after the previous section
+                # (statements since last opened/closed loop), but if we have
+                # not added any statements within the previous section yet, we
+                # don't have to (effectively ignoring that section of code).
                 if stmt_added_since_prev_block_at_tier[-1]:
-                    next_insn_lex_pt[-1] = next_insn_lex_pt[-1]+1
+                    next_insn_lex_tuple[-1] = next_insn_lex_tuple[-1]+1
                     stmt_added_since_prev_block_at_tier[-1] = False
 
                 # upon entering a loop, we enter a new (deeper) tier,
@@ -190,8 +190,8 @@ def __init__(
                 # add second lex dim to enumerate code blocks within new loop, and
                 # append a dim to stmt_added_since_prev_block_at_tier to represent
                 # new tier
-                next_insn_lex_pt.append(iname)
-                next_insn_lex_pt.append(0)
+                next_insn_lex_tuple.append(iname)
+                next_insn_lex_tuple.append(0)
                 stmt_added_since_prev_block_at_tier.append(False)
             elif isinstance(linearization_item, LeaveLoop):
                 if linearization_item.iname in loops_to_ignore:
@@ -200,17 +200,17 @@ def __init__(
                 # pop lex dimension for enumerating code blocks within this loop, and
                 # pop lex dimension for the loop variable, and
                 # increment lex dim val enumerating items in current code block
-                next_insn_lex_pt.pop()
-                next_insn_lex_pt.pop()
-
-                # We could always increment next_insn_lex_pt[-1] here since this new
-                # block of code comes after the previous block (all statements
-                # since last opened/closed loop), but if we have not added any
-                # statements within the previous section yet, we don't have to
-                # (effectively ignoring that section of code).
+                next_insn_lex_tuple.pop()
+                next_insn_lex_tuple.pop()
+
+                # We could always increment next_insn_lex_tuple[-1] here since
+                # this new block of code comes after the previous block (all
+                # statements since last opened/closed loop), but if we have not
+                # added any statements within the previous section yet, we
+                # don't have to (effectively ignoring that section of code).
                 stmt_added_since_prev_block_at_tier.pop()
                 if stmt_added_since_prev_block_at_tier[-1]:
-                    next_insn_lex_pt[-1] = next_insn_lex_pt[-1]+1
+                    next_insn_lex_tuple[-1] = next_insn_lex_tuple[-1]+1
                     stmt_added_since_prev_block_at_tier[-1] = False
             elif isinstance(linearization_item, (RunInstruction, Barrier)):
                 from loopy.schedule.checker.utils import (
@@ -234,7 +234,7 @@ def __init__(
                                 insn_id=lp_insn_id,
                                 int_id=next_sid,  # int representing insn
                                 ),
-                            next_insn_lex_pt[:])
+                            next_insn_lex_tuple[:])
                     stmt_added = True
 
                 if lp_insn_id == after_insn_id:
@@ -244,7 +244,7 @@ def __init__(
                                 insn_id=lp_insn_id,
                                 int_id=next_sid,  # int representing insn
                                 ),
-                            next_insn_lex_pt[:])
+                            next_insn_lex_tuple[:])
                     stmt_added = True
 
                 # Note: before/after may refer to same stmt, in which case
@@ -252,7 +252,7 @@ def __init__(
 
                 if stmt_added:
                     # increment lex dim val enumerating items in current code block
-                    next_insn_lex_pt[-1] = next_insn_lex_pt[-1] + 1
+                    next_insn_lex_tuple[-1] = next_insn_lex_tuple[-1] + 1
                     next_sid += 1
 
                     # all current (nested) blocks now contain a statement
@@ -266,14 +266,14 @@ def __init__(
 
         # at this point, lex_schedule may contain lex points missing dimensions,
         # the values in these missing dims should be zero, so add them
-        self.pad_lex_pts_with_zeros()
+        self.pad_lex_tuples_with_zeros()
 
     def max_lex_dims(self):
         return max([
-            len(self.stmt_instance_before.lex_pt),
-            len(self.stmt_instance_after.lex_pt)])
+            len(self.stmt_instance_before.lex_points),
+            len(self.stmt_instance_after.lex_points)])
 
-    def pad_lex_pts_with_zeros(self):
+    def pad_lex_tuples_with_zeros(self):
         """Find the maximum number of lexicographic dimensions represented
             in the lexicographic ordering, and if any
             :class:`LexScheduleStatement` maps to a point in lexicographic
@@ -281,17 +281,17 @@ def pad_lex_pts_with_zeros(self):
             dimensions.
         """
 
-        def _pad_lex_pt_with_zeros(stmt_inst, length):
+        def _pad_lex_tuple_with_zeros(stmt_inst, length):
             return LexScheduleStatementInstance(
                 stmt_inst.stmt,
-                stmt_inst.lex_pt[:] + [0]*(length-len(stmt_inst.lex_pt)),
+                stmt_inst.lex_points[:] + [0]*(length-len(stmt_inst.lex_points)),
                 )
 
         max_lex_dim = self.max_lex_dims()
 
-        self.stmt_instance_before = _pad_lex_pt_with_zeros(
+        self.stmt_instance_before = _pad_lex_tuple_with_zeros(
             self.stmt_instance_before, max_lex_dim)
-        self.stmt_instance_after = _pad_lex_pt_with_zeros(
+        self.stmt_instance_after = _pad_lex_tuple_with_zeros(
             self.stmt_instance_after, max_lex_dim)
 
     def create_isl_maps(
@@ -366,7 +366,7 @@ def _get_isl_map_for_stmt_inst(
             # Add all inames from domains to each map domain tuple.
             tuple_pair = [(
                 (stmt_inst.stmt.int_id, ) + tuple(dom_inames_ordered),
-                stmt_inst.lex_pt
+                stmt_inst.lex_points
                 )]
 
             # create isl map
@@ -396,7 +396,7 @@ def stringify_sched_stmt_instance(stmt_inst):
             return "{\n[%s=%s,<inames>] -> %s;\n}" % (
                 self.statement_var_name,
                 stmt_inst.stmt.int_id,
-                stmt_inst.lex_pt)
+                stmt_inst.lex_points)
 
         return "Before: %s\nAfter: %s" % (
             stringify_sched_stmt_instance(self.stmt_instance_before),
diff --git a/test/test_linearization_checker.py b/test/test_linearization_checker.py
index 0dfb2fc90..aab9c8507 100644
--- a/test/test_linearization_checker.py
+++ b/test/test_linearization_checker.py
@@ -94,37 +94,37 @@ def test_lexschedule_and_islmap_creation():
     linearization_items = knl.linearization
 
     # Create LexSchedule: mapping of {statement instance: lex point}
-    lex_sched_ab = get_schedule_for_statement_pair(
+    sched_ab = get_schedule_for_statement_pair(
         knl,
         linearization_items,
         "insn_a",
         "insn_b",
         )
-    lex_sched_ac = get_schedule_for_statement_pair(
+    sched_ac = get_schedule_for_statement_pair(
         knl,
         linearization_items,
         "insn_a",
         "insn_c",
         )
-    lex_sched_ad = get_schedule_for_statement_pair(
+    sched_ad = get_schedule_for_statement_pair(
         knl,
         linearization_items,
         "insn_a",
         "insn_d",
         )
-    lex_sched_bc = get_schedule_for_statement_pair(
+    sched_bc = get_schedule_for_statement_pair(
         knl,
         linearization_items,
         "insn_b",
         "insn_c",
         )
-    lex_sched_bd = get_schedule_for_statement_pair(
+    sched_bd = get_schedule_for_statement_pair(
         knl,
         linearization_items,
         "insn_b",
         "insn_d",
         )
-    lex_sched_cd = get_schedule_for_statement_pair(
+    sched_cd = get_schedule_for_statement_pair(
         knl,
         linearization_items,
         "insn_c",
@@ -133,13 +133,13 @@ def test_lexschedule_and_islmap_creation():
 
     # Relationship between insn_a and insn_b ---------------------------------------
 
-    assert lex_sched_ab.stmt_instance_before.lex_pt == [0, 'i', 0, 'k', 0]
-    assert lex_sched_ab.stmt_instance_after.lex_pt == [0, 'i', 1, 'j', 0]
+    assert sched_ab.stmt_instance_before.lex_points == [0, 'i', 0, 'k', 0]
+    assert sched_ab.stmt_instance_after.lex_points == [0, 'i', 1, 'j', 0]
 
     # Get two isl maps representing the LexSchedule
 
     isl_sched_map_before, isl_sched_map_after = get_isl_maps_for_LexSchedule(
-        lex_sched_ab, knl)
+        sched_ab, knl)
 
     # Create expected maps, align, compare
 
@@ -169,13 +169,13 @@ def test_lexschedule_and_islmap_creation():
     # ------------------------------------------------------------------------------
     # Relationship between insn_a and insn_c ---------------------------------------
 
-    assert lex_sched_ac.stmt_instance_before.lex_pt == [0, 'i', 0, 'k', 0]
-    assert lex_sched_ac.stmt_instance_after.lex_pt == [0, 'i', 1, 'j', 0]
+    assert sched_ac.stmt_instance_before.lex_points == [0, 'i', 0, 'k', 0]
+    assert sched_ac.stmt_instance_after.lex_points == [0, 'i', 1, 'j', 0]
 
     # Get two isl maps representing the LexSchedule
 
     isl_sched_map_before, isl_sched_map_after = get_isl_maps_for_LexSchedule(
-        lex_sched_ac, knl)
+        sched_ac, knl)
 
     # Create expected maps, align, compare
 
@@ -208,13 +208,13 @@ def test_lexschedule_and_islmap_creation():
     # insn_a and insn_d could have been linearized in either order
     # (i loop could be before or after t loop)
     def perform_insn_ad_checks_with(sid_a, sid_d):
-        assert lex_sched_ad.stmt_instance_before.lex_pt == [sid_a, 'i', 0, 'k', 0]
-        assert lex_sched_ad.stmt_instance_after.lex_pt == [sid_d, 't', 0, 0, 0]
+        assert sched_ad.stmt_instance_before.lex_points == [sid_a, 'i', 0, 'k', 0]
+        assert sched_ad.stmt_instance_after.lex_points == [sid_d, 't', 0, 0, 0]
 
         # Get two isl maps representing the LexSchedule
 
         isl_sched_map_before, isl_sched_map_after = get_isl_maps_for_LexSchedule(
-            lex_sched_ad, knl)
+            sched_ad, knl)
 
         # Create expected maps, align, compare
 
@@ -243,7 +243,7 @@ def perform_insn_ad_checks_with(sid_a, sid_d):
         assert isl_sched_map_before == isl_sched_map_before_expected
         assert isl_sched_map_after == isl_sched_map_after_expected
 
-    if lex_sched_ad.stmt_instance_before.stmt.int_id == 0:
+    if sched_ad.stmt_instance_before.stmt.int_id == 0:
         perform_insn_ad_checks_with(0, 1)
     else:
         perform_insn_ad_checks_with(1, 0)
@@ -254,13 +254,13 @@ def perform_insn_ad_checks_with(sid_a, sid_d):
     # insn_b and insn_c could have been linearized in either order
     # (i loop could be before or after t loop)
     def perform_insn_bc_checks_with(sid_b, sid_c):
-        assert lex_sched_bc.stmt_instance_before.lex_pt == [0, 'i', 0, 'j', sid_b]
-        assert lex_sched_bc.stmt_instance_after.lex_pt == [0, 'i', 0, 'j', sid_c]
+        assert sched_bc.stmt_instance_before.lex_points == [0, 'i', 0, 'j', sid_b]
+        assert sched_bc.stmt_instance_after.lex_points == [0, 'i', 0, 'j', sid_c]
 
         # Get two isl maps representing the LexSchedule
 
         isl_sched_map_before, isl_sched_map_after = get_isl_maps_for_LexSchedule(
-            lex_sched_bc, knl)
+            sched_bc, knl)
 
         # Create expected maps, align, compare
 
@@ -289,7 +289,7 @@ def perform_insn_bc_checks_with(sid_b, sid_c):
         assert isl_sched_map_before == isl_sched_map_before_expected
         assert isl_sched_map_after == isl_sched_map_after_expected
 
-    if lex_sched_bc.stmt_instance_before.stmt.int_id == 0:
+    if sched_bc.stmt_instance_before.stmt.int_id == 0:
         perform_insn_bc_checks_with(0, 1)
     else:
         perform_insn_bc_checks_with(1, 0)
@@ -300,13 +300,13 @@ def perform_insn_bc_checks_with(sid_b, sid_c):
     # insn_b and insn_d could have been linearized in either order
     # (i loop could be before or after t loop)
     def perform_insn_bd_checks_with(sid_b, sid_d):
-        assert lex_sched_bd.stmt_instance_before.lex_pt == [sid_b, 'i', 0, 'j', 0]
-        assert lex_sched_bd.stmt_instance_after.lex_pt == [sid_d, 't', 0, 0, 0]
+        assert sched_bd.stmt_instance_before.lex_points == [sid_b, 'i', 0, 'j', 0]
+        assert sched_bd.stmt_instance_after.lex_points == [sid_d, 't', 0, 0, 0]
 
         # Get two isl maps representing the LexSchedule
 
         isl_sched_map_before, isl_sched_map_after = get_isl_maps_for_LexSchedule(
-            lex_sched_bd, knl)
+            sched_bd, knl)
 
         # Create expected maps, align, compare
 
@@ -335,7 +335,7 @@ def perform_insn_bd_checks_with(sid_b, sid_d):
         assert isl_sched_map_before == isl_sched_map_before_expected
         assert isl_sched_map_after == isl_sched_map_after_expected
 
-    if lex_sched_bd.stmt_instance_before.stmt.int_id == 0:
+    if sched_bd.stmt_instance_before.stmt.int_id == 0:
         perform_insn_bd_checks_with(0, 1)
     else:
         perform_insn_bd_checks_with(1, 0)
@@ -346,13 +346,13 @@ def perform_insn_bd_checks_with(sid_b, sid_d):
     # insn_c and insn_d could have been linearized in either order
     # (i loop could be before or after t loop)
     def perform_insn_cd_checks_with(sid_c, sid_d):
-        assert lex_sched_cd.stmt_instance_before.lex_pt == [sid_c, 'i', 0, 'j', 0]
-        assert lex_sched_cd.stmt_instance_after.lex_pt == [sid_d, 't', 0, 0, 0]
+        assert sched_cd.stmt_instance_before.lex_points == [sid_c, 'i', 0, 'j', 0]
+        assert sched_cd.stmt_instance_after.lex_points == [sid_d, 't', 0, 0, 0]
 
         # Get two isl maps representing the LexSchedule
 
         isl_sched_map_before, isl_sched_map_after = get_isl_maps_for_LexSchedule(
-            lex_sched_cd, knl)
+            sched_cd, knl)
 
         # Create expected maps, align, compare
 
@@ -381,7 +381,7 @@ def perform_insn_cd_checks_with(sid_c, sid_d):
         assert isl_sched_map_before == isl_sched_map_before_expected
         assert isl_sched_map_after == isl_sched_map_after_expected
 
-    if lex_sched_cd.stmt_instance_before.stmt.int_id == 0:
+    if sched_cd.stmt_instance_before.stmt.int_id == 0:
         perform_insn_cd_checks_with(0, 1)
     else:
         perform_insn_cd_checks_with(1, 0)

From 7cc557eaeb4351cf23cf17b76c1648c3a4a0e9e1 Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Mon, 25 May 2020 21:30:20 -0500
Subject: [PATCH 051/460] rename
 LexScheduleStatementInstance->LexScheduleStatementInstanceSet (will probably
 rename again)

---
 loopy/schedule/checker/__init__.py |  6 +++---
 loopy/schedule/checker/schedule.py | 15 ++++++++-------
 2 files changed, 11 insertions(+), 10 deletions(-)

diff --git a/loopy/schedule/checker/__init__.py b/loopy/schedule/checker/__init__.py
index 3215201ae..176b1399c 100644
--- a/loopy/schedule/checker/__init__.py
+++ b/loopy/schedule/checker/__init__.py
@@ -31,7 +31,7 @@ def get_schedule_for_statement_pair(
         ):
     """Create a :class:`loopy.schedule.checker.schedule.LexSchedule`
         representing the order of two statements as a mapping from
-        :class:`loopy.schedule.checker.LexScheduleStatementInstance`
+        :class:`loopy.schedule.checker.LexScheduleStatementInstanceSet`
         to lexicographic time.
 
     :arg knl: A :class:`loopy.kernel.LoopKernel` containing the
@@ -52,7 +52,7 @@ def get_schedule_for_statement_pair(
 
     :returns: A :class:`loopy.schedule.checker.schedule.LexSchedule`
         representing the order of two statements as a mapping from
-        :class:`loopy.schedule.checker.LexScheduleStatementInstance`
+        :class:`loopy.schedule.checker.LexScheduleStatementInstanceSet`
         to lexicographic time.
     """
 
@@ -105,7 +105,7 @@ def get_isl_maps_for_LexSchedule(lex_sched, knl):
 
     :arg lex_sched: A :class:`loopy.schedule.checker.schedule.LexSchedule`
         representing the order of two statements as a mapping from
-        :class:`loopy.schedule.checker.LexScheduleStatementInstance`
+        :class:`loopy.schedule.checker.LexScheduleStatementInstanceSet`
         to lexicographic time.
 
     :arg knl: A :class:`loopy.kernel.LoopKernel` containing the
diff --git a/loopy/schedule/checker/schedule.py b/loopy/schedule/checker/schedule.py
index 052a47afe..f839f45aa 100644
--- a/loopy/schedule/checker/schedule.py
+++ b/loopy/schedule/checker/schedule.py
@@ -72,8 +72,9 @@ def __str__(self):
         return "%s%s" % (self.insn_id, int_id)
 
 
-class LexScheduleStatementInstance(object):
-    """A representation of a statement instance.
+class LexScheduleStatementInstanceSet(object):
+    """A representation of a set of instances of a
+    :class:`LexScheduleStatement`.
 
     .. attribute:: stmt
 
@@ -106,7 +107,7 @@ class LexSchedule(object):
 
     .. attribute:: stmt_instance_before
 
-       A :class:`LexScheduleStatementInstance` describing the dependee
+       A :class:`LexScheduleStatementInstanceSet` describing the dependee
        statement's order relative to the depender statment by mapping
        a statement to a point or set of points in a lexicographic
        ordering. Points in lexicographic ordering are represented as
@@ -114,7 +115,7 @@ class LexSchedule(object):
 
     .. attribute:: stmt_instance_after
 
-       A :class:`LexScheduleStatementInstance` describing the depender
+       A :class:`LexScheduleStatementInstanceSet` describing the depender
        statement's order relative to the dependee statment by mapping
        a statement to a point or set of points in a lexicographic
        ordering. Points in lexicographic ordering are represented as
@@ -229,7 +230,7 @@ def __init__(
 
                 if lp_insn_id == before_insn_id:
                     # add before sched item
-                    self.stmt_instance_before = LexScheduleStatementInstance(
+                    self.stmt_instance_before = LexScheduleStatementInstanceSet(
                             LexScheduleStatement(
                                 insn_id=lp_insn_id,
                                 int_id=next_sid,  # int representing insn
@@ -239,7 +240,7 @@ def __init__(
 
                 if lp_insn_id == after_insn_id:
                     # add after sched item
-                    self.stmt_instance_after = LexScheduleStatementInstance(
+                    self.stmt_instance_after = LexScheduleStatementInstanceSet(
                             LexScheduleStatement(
                                 insn_id=lp_insn_id,
                                 int_id=next_sid,  # int representing insn
@@ -282,7 +283,7 @@ def pad_lex_tuples_with_zeros(self):
         """
 
         def _pad_lex_tuple_with_zeros(stmt_inst, length):
-            return LexScheduleStatementInstance(
+            return LexScheduleStatementInstanceSet(
                 stmt_inst.stmt,
                 stmt_inst.lex_points[:] + [0]*(length-len(stmt_inst.lex_points)),
                 )

From 2f6942632965b1406bce1b598ab7235dd2759bf7 Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Mon, 25 May 2020 21:43:54 -0500
Subject: [PATCH 052/460] remove comments with redundant documentation

---
 loopy/schedule/checker/schedule.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/loopy/schedule/checker/schedule.py b/loopy/schedule/checker/schedule.py
index f839f45aa..aac9b5744 100644
--- a/loopy/schedule/checker/schedule.py
+++ b/loopy/schedule/checker/schedule.py
@@ -44,10 +44,10 @@ class LexScheduleStatement(object):
 
     def __init__(
             self,
-            insn_id,  # loopy insn id
-            int_id=None,  # sid int (statement id within LexSchedule)
+            insn_id,
+            int_id=None,
             ):
-        self.insn_id = insn_id  # string
+        self.insn_id = insn_id
         self.int_id = int_id
 
     def __eq__(self, other):
@@ -89,8 +89,8 @@ class LexScheduleStatementInstanceSet(object):
 
     def __init__(
             self,
-            stmt,  # a LexScheduleStatement
-            lex_points,  # [string/int, ]
+            stmt,
+            lex_points,
             ):
         self.stmt = stmt
         self.lex_points = lex_points

From 2f494a2a02d15339ceb7662fee51ae0bcbb2a18e Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Mon, 25 May 2020 21:49:33 -0500
Subject: [PATCH 053/460] update more comments to clarify that the lex tuples
 represent *multiple* points, not a single point

---
 loopy/schedule/checker/schedule.py | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/loopy/schedule/checker/schedule.py b/loopy/schedule/checker/schedule.py
index aac9b5744..c40053869 100644
--- a/loopy/schedule/checker/schedule.py
+++ b/loopy/schedule/checker/schedule.py
@@ -166,8 +166,8 @@ def __init__(
 
         # go through linearization_items_ordered and generate self.lex_schedule
 
-        # keep track of the next point in our lexicographic ordering
-        # initially this as a 1-d point with value 0
+        # keep track of the next tuple of points in our lexicographic
+        # ordering, initially this as a 1-d point with value 0
         next_insn_lex_tuple = [0]
         stmt_added_since_prev_block_at_tier = [False]
         next_sid = 0
@@ -265,8 +265,9 @@ def __init__(
             if self.stmt_instance_before and self.stmt_instance_after:
                 break
 
-        # at this point, lex_schedule may contain lex points missing dimensions,
-        # the values in these missing dims should be zero, so add them
+        # At this point, lex_schedule may contain lex point tuples
+        # missing dimensions; the values in these missing dims should
+        # be zero, so add them.
         self.pad_lex_tuples_with_zeros()
 
     def max_lex_dims(self):
@@ -277,9 +278,8 @@ def max_lex_dims(self):
     def pad_lex_tuples_with_zeros(self):
         """Find the maximum number of lexicographic dimensions represented
             in the lexicographic ordering, and if any
-            :class:`LexScheduleStatement` maps to a point in lexicographic
-            time with fewer dimensions, add a zero for each of the missing
-            dimensions.
+            :class:`LexScheduleStatement` maps to a lex point tuple with
+            fewer dimensions, add a zero for each of the missing dimensions.
         """
 
         def _pad_lex_tuple_with_zeros(stmt_inst, length):

From c71a0efb3182a50ccfad6d346d62abc93088d3f8 Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Mon, 25 May 2020 22:13:38 -0500
Subject: [PATCH 054/460] clarify what a LexScheduleStatementInstanceSet is

---
 loopy/schedule/checker/schedule.py | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/loopy/schedule/checker/schedule.py b/loopy/schedule/checker/schedule.py
index c40053869..c4b4cfc78 100644
--- a/loopy/schedule/checker/schedule.py
+++ b/loopy/schedule/checker/schedule.py
@@ -73,8 +73,11 @@ def __str__(self):
 
 
 class LexScheduleStatementInstanceSet(object):
-    """A representation of a set of instances of a
-    :class:`LexScheduleStatement`.
+    """A representation of a set of (non-concurrent) instances of a
+    statement being executed. The ordering of the instances is described
+    by the `lex_points` attribute, a list representing points in a
+    lexicographic ordering of statements. Each field in the list
+    corresponds to a dimension in the lexicographic ordering.
 
     .. attribute:: stmt
 
@@ -82,8 +85,9 @@ class LexScheduleStatementInstanceSet(object):
 
     .. attribute:: lex_points
 
-       A list of :class:`int` or as :class:`str` :mod:`loopy` inames representing
-       a point or set of points in a lexicographic ordering.
+       A list containing one value for each dimension in a lexicographic
+       ordering. These values describe the ordering of the statements,
+       and may be :class:`str` :mod:`loopy` inames or :class:`int`.
 
     """
 

From 492e1f7d66d837076592caeb20813a8df2fe5373 Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Tue, 26 May 2020 10:16:32 -0500
Subject: [PATCH 055/460] rename
 LexScheduleStatement->PairwiseScheduleStatement,
 get_isl_maps_for_LexSchedule->get_isl_maps_from_PairwiseScheduleBuilder,
 LexSchedule->PairwiseScheduleBuilder

---
 loopy/schedule/checker/__init__.py |  32 +++----
 loopy/schedule/checker/schedule.py |  42 ++++-----
 test/test_linearization_checker.py | 132 ++++++++++++++---------------
 3 files changed, 103 insertions(+), 103 deletions(-)

diff --git a/loopy/schedule/checker/__init__.py b/loopy/schedule/checker/__init__.py
index 176b1399c..1da7b1e16 100644
--- a/loopy/schedule/checker/__init__.py
+++ b/loopy/schedule/checker/__init__.py
@@ -21,7 +21,7 @@
 """
 
 
-# {{{ Create LexSchedule for statement pair
+# {{{ Create PairwiseScheduleBuilder for statement pair
 
 def get_schedule_for_statement_pair(
         knl,
@@ -29,9 +29,9 @@ def get_schedule_for_statement_pair(
         insn_id_before,
         insn_id_after,
         ):
-    """Create a :class:`loopy.schedule.checker.schedule.LexSchedule`
+    """Create a :class:`loopy.schedule.checker.schedule.PairwiseScheduleBuilder`
         representing the order of two statements as a mapping from
-        :class:`loopy.schedule.checker.LexScheduleStatementInstanceSet`
+        :class:`loopy.schedule.checker.PairwiseScheduleStatementInstanceSet`
         to lexicographic time.
 
     :arg knl: A :class:`loopy.kernel.LoopKernel` containing the
@@ -50,9 +50,9 @@ def get_schedule_for_statement_pair(
     :arg insn_id_after: An instruction identifier that is unique within
         a :class:`loopy.kernel.LoopKernel`.
 
-    :returns: A :class:`loopy.schedule.checker.schedule.LexSchedule`
+    :returns: A :class:`loopy.schedule.checker.schedule.PairwiseScheduleBuilder`
         representing the order of two statements as a mapping from
-        :class:`loopy.schedule.checker.LexScheduleStatementInstanceSet`
+        :class:`loopy.schedule.checker.PairwiseScheduleStatementInstanceSet`
         to lexicographic time.
     """
 
@@ -62,7 +62,7 @@ def get_schedule_for_statement_pair(
     # }}}
 
     # {{{ Find any EnterLoop inames that are tagged as concurrent
-    # so that LexSchedule knows to ignore them
+    # so that PairwiseScheduleBuilder knows to ignore them
     # (In the future, this shouldn't be necessary because there
     #  won't be any inames with ConcurrentTags in EnterLoop linearization items.
     #  Test which exercises this: test_linearization_checker_with_stroud_bernstein())
@@ -81,10 +81,10 @@ def get_schedule_for_statement_pair(
             "Ignoring these loops." % (conc_loop_inames, preproc_knl.name))
     # }}}
 
-    # {{{ Create LexSchedule: mapping of {statement instance: lex point}
+    # {{{ Create PairwiseScheduleBuilder: mapping of {statement instance: lex point}
     # include only instructions involved in this dependency
-    from loopy.schedule.checker.schedule import LexSchedule
-    return LexSchedule(
+    from loopy.schedule.checker.schedule import PairwiseScheduleBuilder
+    return PairwiseScheduleBuilder(
         linearization_items,
         insn_id_before,
         insn_id_after,
@@ -95,17 +95,17 @@ def get_schedule_for_statement_pair(
 # }}}
 
 
-# {{{ Get isl map pair for LexSchedule
+# {{{ Get isl map pair from PairwiseScheduleBuilder
 
-def get_isl_maps_for_LexSchedule(lex_sched, knl):
+def get_isl_maps_from_PairwiseScheduleBuilder(lex_sched, knl):
     """Create a pair of :class:`islpy.Map`s representing a
-        :class:`loopy.schedule.checker.LexSchedule` as two mappings
-        from statement instances to lexicographic time, one for
-        the dependee statement and one for the depender.
+        sub-schedule as two mappings from statement instances to lexicographic
+        time, one for the dependee statement and one for the depender.
 
-    :arg lex_sched: A :class:`loopy.schedule.checker.schedule.LexSchedule`
+    :arg lex_sched: A
+        :class:`loopy.schedule.checker.schedule.PairwiseScheduleBuilder`
         representing the order of two statements as a mapping from
-        :class:`loopy.schedule.checker.LexScheduleStatementInstanceSet`
+        :class:`loopy.schedule.checker.PairwiseScheduleStatementInstanceSet`
         to lexicographic time.
 
     :arg knl: A :class:`loopy.kernel.LoopKernel` containing the
diff --git a/loopy/schedule/checker/schedule.py b/loopy/schedule/checker/schedule.py
index c4b4cfc78..4d80d8945 100644
--- a/loopy/schedule/checker/schedule.py
+++ b/loopy/schedule/checker/schedule.py
@@ -23,7 +23,7 @@
 import islpy as isl
 
 
-class LexScheduleStatement(object):
+class PairwiseScheduleStatement(object):
     """A representation of a :mod:`loopy` statement.
 
     .. attribute:: insn_id
@@ -34,11 +34,11 @@ class LexScheduleStatement(object):
     .. attribute:: int_id
 
        A :class:`int` uniquely identifying the statement within a
-       :class:`LexSchedule`. A :class:`LexSchedule` describes a mapping
-       from points in a space of statement instances to points in a
-       lexicographic ordering. The `statement` dimension of a point
-       in the statement instance space representing an instance of this
-       statement is assigned this value (`int_id`).
+       :class:`PairwiseScheduleBuilder`. A :class:`PairwiseScheduleBuilder`
+       builds a mapping from points in a space of statement instances to
+       points in a lexicographic ordering. The `statement` dimension of a
+       point in the statement instance space representing an instance of
+       this statement is assigned this value (`int_id`).
 
     """
 
@@ -72,7 +72,7 @@ def __str__(self):
         return "%s%s" % (self.insn_id, int_id)
 
 
-class LexScheduleStatementInstanceSet(object):
+class PairwiseScheduleStatementInstanceSet(object):
     """A representation of a set of (non-concurrent) instances of a
     statement being executed. The ordering of the instances is described
     by the `lex_points` attribute, a list representing points in a
@@ -81,7 +81,7 @@ class LexScheduleStatementInstanceSet(object):
 
     .. attribute:: stmt
 
-       A :class:`LexScheduleStatement`.
+       A :class:`PairwiseScheduleStatement`.
 
     .. attribute:: lex_points
 
@@ -103,15 +103,15 @@ def __str__(self):
         return "{%s, %s}" % (self.stmt, self.lex_points)
 
 
-class LexSchedule(object):
-    """Given a pair of statements in a linearized kernel, LexSchedule
+class PairwiseScheduleBuilder(object):
+    """Given a pair of statements in a linearized kernel, PairwiseScheduleBuilder
     determines the (relative) order in which the instances are executed,
     by creating a mapping from statement instances to points in a single
     lexicographic ordering.
 
     .. attribute:: stmt_instance_before
 
-       A :class:`LexScheduleStatementInstanceSet` describing the dependee
+       A :class:`PairwiseScheduleStatementInstanceSet` describing the dependee
        statement's order relative to the depender statment by mapping
        a statement to a point or set of points in a lexicographic
        ordering. Points in lexicographic ordering are represented as
@@ -119,7 +119,7 @@ class LexSchedule(object):
 
     .. attribute:: stmt_instance_after
 
-       A :class:`LexScheduleStatementInstanceSet` describing the depender
+       A :class:`PairwiseScheduleStatementInstanceSet` describing the depender
        statement's order relative to the dependee statment by mapping
        a statement to a point or set of points in a lexicographic
        ordering. Points in lexicographic ordering are represented as
@@ -150,7 +150,7 @@ def __init__(
             ):
         """
         :arg linearization_items_ordered: A list of :class:`ScheduleItem` whose
-            order will be described by this :class:`LexSchedule`.
+            order will be described by this :class:`PairwiseScheduleBuilder`.
 
         :arg before_insn_id: A :class:`str` instruction id specifying
             the dependee in this pair of instructions.
@@ -160,11 +160,11 @@ def __init__(
 
         """
 
-        # LexScheduleStatements
+        # PairwiseScheduleBuilder statements
         self.stmt_instance_before = None
         self.stmt_instance_after = None
         # TODO when/after dependencies are added, consider the possibility
-        # of removing the two-statements-per-LexSchedule limitation
+        # of removing the two-statements-per-PairwiseScheduleBuilder limitation
 
         from loopy.schedule import (EnterLoop, LeaveLoop, Barrier, RunInstruction)
 
@@ -234,8 +234,8 @@ def __init__(
 
                 if lp_insn_id == before_insn_id:
                     # add before sched item
-                    self.stmt_instance_before = LexScheduleStatementInstanceSet(
-                            LexScheduleStatement(
+                    self.stmt_instance_before = PairwiseScheduleStatementInstanceSet(
+                            PairwiseScheduleStatement(
                                 insn_id=lp_insn_id,
                                 int_id=next_sid,  # int representing insn
                                 ),
@@ -244,8 +244,8 @@ def __init__(
 
                 if lp_insn_id == after_insn_id:
                     # add after sched item
-                    self.stmt_instance_after = LexScheduleStatementInstanceSet(
-                            LexScheduleStatement(
+                    self.stmt_instance_after = PairwiseScheduleStatementInstanceSet(
+                            PairwiseScheduleStatement(
                                 insn_id=lp_insn_id,
                                 int_id=next_sid,  # int representing insn
                                 ),
@@ -282,12 +282,12 @@ def max_lex_dims(self):
     def pad_lex_tuples_with_zeros(self):
         """Find the maximum number of lexicographic dimensions represented
             in the lexicographic ordering, and if any
-            :class:`LexScheduleStatement` maps to a lex point tuple with
+            :class:`PairwiseScheduleStatement` maps to a lex point tuple with
             fewer dimensions, add a zero for each of the missing dimensions.
         """
 
         def _pad_lex_tuple_with_zeros(stmt_inst, length):
-            return LexScheduleStatementInstanceSet(
+            return PairwiseScheduleStatementInstanceSet(
                 stmt_inst.stmt,
                 stmt_inst.lex_points[:] + [0]*(length-len(stmt_inst.lex_points)),
                 )
diff --git a/test/test_linearization_checker.py b/test/test_linearization_checker.py
index aab9c8507..a4696c3b2 100644
--- a/test/test_linearization_checker.py
+++ b/test/test_linearization_checker.py
@@ -50,7 +50,7 @@ def test_lexschedule_and_islmap_creation():
     import islpy as isl
     from loopy.schedule.checker import (
         get_schedule_for_statement_pair,
-        get_isl_maps_for_LexSchedule,
+        get_isl_maps_from_PairwiseScheduleBuilder,
     )
     from loopy.schedule.checker.utils import (
         align_isl_maps_by_var_names,
@@ -93,7 +93,7 @@ def test_lexschedule_and_islmap_creation():
     knl = get_one_linearized_kernel(knl)
     linearization_items = knl.linearization
 
-    # Create LexSchedule: mapping of {statement instance: lex point}
+    # Create PairwiseScheduleBuilder: mapping of {statement instance: lex point}
     sched_ab = get_schedule_for_statement_pair(
         knl,
         linearization_items,
@@ -136,35 +136,35 @@ def test_lexschedule_and_islmap_creation():
     assert sched_ab.stmt_instance_before.lex_points == [0, 'i', 0, 'k', 0]
     assert sched_ab.stmt_instance_after.lex_points == [0, 'i', 1, 'j', 0]
 
-    # Get two isl maps representing the LexSchedule
+    # Get two isl maps from the PairwiseScheduleBuilder
 
-    isl_sched_map_before, isl_sched_map_after = get_isl_maps_for_LexSchedule(
+    isl_sched_before, isl_sched_after = get_isl_maps_from_PairwiseScheduleBuilder(
         sched_ab, knl)
 
     # Create expected maps, align, compare
 
-    isl_sched_map_before_expected = isl.Map(
+    isl_sched_before_expected = isl.Map(
         "[pi, pk] -> { "
         "[_lp_sched_statement=0, i, k] -> "
         "[_lp_sched_l0=0, _lp_sched_l1=i, _lp_sched_l2=0, _lp_sched_l3=k, "
         "_lp_sched_l4=0] : "
         "0 <= i < pi and 0 <= k < pk }"
         )
-    isl_sched_map_before_expected = align_isl_maps_by_var_names(
-        isl_sched_map_before_expected, isl_sched_map_before)
+    isl_sched_before_expected = align_isl_maps_by_var_names(
+        isl_sched_before_expected, isl_sched_before)
 
-    isl_sched_map_after_expected = isl.Map(
+    isl_sched_after_expected = isl.Map(
         "[pi, pj] -> { "
         "[_lp_sched_statement=1, i, j] -> "
         "[_lp_sched_l0=0, _lp_sched_l1=i, _lp_sched_l2=1, _lp_sched_l3=j, "
         "_lp_sched_l4=0] : "
         "0 <= i < pi and 0 <= j < pj }"
         )
-    isl_sched_map_after_expected = align_isl_maps_by_var_names(
-        isl_sched_map_after_expected, isl_sched_map_after)
+    isl_sched_after_expected = align_isl_maps_by_var_names(
+        isl_sched_after_expected, isl_sched_after)
 
-    assert isl_sched_map_before == isl_sched_map_before_expected
-    assert isl_sched_map_after == isl_sched_map_after_expected
+    assert isl_sched_before == isl_sched_before_expected
+    assert isl_sched_after == isl_sched_after_expected
 
     # ------------------------------------------------------------------------------
     # Relationship between insn_a and insn_c ---------------------------------------
@@ -172,35 +172,35 @@ def test_lexschedule_and_islmap_creation():
     assert sched_ac.stmt_instance_before.lex_points == [0, 'i', 0, 'k', 0]
     assert sched_ac.stmt_instance_after.lex_points == [0, 'i', 1, 'j', 0]
 
-    # Get two isl maps representing the LexSchedule
+    # Get two isl maps from the PairwiseScheduleBuilder
 
-    isl_sched_map_before, isl_sched_map_after = get_isl_maps_for_LexSchedule(
+    isl_sched_before, isl_sched_after = get_isl_maps_from_PairwiseScheduleBuilder(
         sched_ac, knl)
 
     # Create expected maps, align, compare
 
-    isl_sched_map_before_expected = isl.Map(
+    isl_sched_before_expected = isl.Map(
         "[pi, pk] -> { "
         "[_lp_sched_statement=0, i, k] -> "
         "[_lp_sched_l0=0, _lp_sched_l1=i, _lp_sched_l2=0, _lp_sched_l3=k, "
         "_lp_sched_l4=0] : "
         "0 <= i < pi and 0 <= k < pk }"
         )
-    isl_sched_map_before_expected = align_isl_maps_by_var_names(
-        isl_sched_map_before_expected, isl_sched_map_before)
+    isl_sched_before_expected = align_isl_maps_by_var_names(
+        isl_sched_before_expected, isl_sched_before)
 
-    isl_sched_map_after_expected = isl.Map(
+    isl_sched_after_expected = isl.Map(
         "[pi, pj] -> { "
         "[_lp_sched_statement=1, i, j] -> "
         "[_lp_sched_l0=0, _lp_sched_l1=i, _lp_sched_l2=1, _lp_sched_l3=j, "
         "_lp_sched_l4=0] : "
         "0 <= i < pi and 0 <= j < pj }"
         )
-    isl_sched_map_after_expected = align_isl_maps_by_var_names(
-        isl_sched_map_after_expected, isl_sched_map_after)
+    isl_sched_after_expected = align_isl_maps_by_var_names(
+        isl_sched_after_expected, isl_sched_after)
 
-    assert isl_sched_map_before == isl_sched_map_before_expected
-    assert isl_sched_map_after == isl_sched_map_after_expected
+    assert isl_sched_before == isl_sched_before_expected
+    assert isl_sched_after == isl_sched_after_expected
 
     # ------------------------------------------------------------------------------
     # Relationship between insn_a and insn_d ---------------------------------------
@@ -211,14 +211,14 @@ def perform_insn_ad_checks_with(sid_a, sid_d):
         assert sched_ad.stmt_instance_before.lex_points == [sid_a, 'i', 0, 'k', 0]
         assert sched_ad.stmt_instance_after.lex_points == [sid_d, 't', 0, 0, 0]
 
-        # Get two isl maps representing the LexSchedule
+        # Get two isl maps from the PairwiseScheduleBuilder
 
-        isl_sched_map_before, isl_sched_map_after = get_isl_maps_for_LexSchedule(
-            sched_ad, knl)
+        isl_sched_before, isl_sched_after = \
+            get_isl_maps_from_PairwiseScheduleBuilder(sched_ad, knl)
 
         # Create expected maps, align, compare
 
-        isl_sched_map_before_expected = isl.Map(
+        isl_sched_before_expected = isl.Map(
             "[pi, pk] -> { "
             "[_lp_sched_statement=%d, i, k] -> "
             "[_lp_sched_l0=%d, _lp_sched_l1=i, _lp_sched_l2=0, _lp_sched_l3=k, "
@@ -226,10 +226,10 @@ def perform_insn_ad_checks_with(sid_a, sid_d):
             "0 <= i < pi and 0 <= k < pk }"
             % (sid_a, sid_a)
             )
-        isl_sched_map_before_expected = align_isl_maps_by_var_names(
-            isl_sched_map_before_expected, isl_sched_map_before)
+        isl_sched_before_expected = align_isl_maps_by_var_names(
+            isl_sched_before_expected, isl_sched_before)
 
-        isl_sched_map_after_expected = isl.Map(
+        isl_sched_after_expected = isl.Map(
             "[pt] -> { "
             "[_lp_sched_statement=%d, t] -> "
             "[_lp_sched_l0=%d, _lp_sched_l1=t, _lp_sched_l2=0, _lp_sched_l3=0, "
@@ -237,11 +237,11 @@ def perform_insn_ad_checks_with(sid_a, sid_d):
             "0 <= t < pt }"
             % (sid_d, sid_d)
             )
-        isl_sched_map_after_expected = align_isl_maps_by_var_names(
-            isl_sched_map_after_expected, isl_sched_map_after)
+        isl_sched_after_expected = align_isl_maps_by_var_names(
+            isl_sched_after_expected, isl_sched_after)
 
-        assert isl_sched_map_before == isl_sched_map_before_expected
-        assert isl_sched_map_after == isl_sched_map_after_expected
+        assert isl_sched_before == isl_sched_before_expected
+        assert isl_sched_after == isl_sched_after_expected
 
     if sched_ad.stmt_instance_before.stmt.int_id == 0:
         perform_insn_ad_checks_with(0, 1)
@@ -257,14 +257,14 @@ def perform_insn_bc_checks_with(sid_b, sid_c):
         assert sched_bc.stmt_instance_before.lex_points == [0, 'i', 0, 'j', sid_b]
         assert sched_bc.stmt_instance_after.lex_points == [0, 'i', 0, 'j', sid_c]
 
-        # Get two isl maps representing the LexSchedule
+        # Get two isl maps from the PairwiseScheduleBuilder
 
-        isl_sched_map_before, isl_sched_map_after = get_isl_maps_for_LexSchedule(
-            sched_bc, knl)
+        isl_sched_before, isl_sched_after = \
+            get_isl_maps_from_PairwiseScheduleBuilder(sched_bc, knl)
 
         # Create expected maps, align, compare
 
-        isl_sched_map_before_expected = isl.Map(
+        isl_sched_before_expected = isl.Map(
             "[pi, pj] -> { "
             "[_lp_sched_statement=%d, i, j] -> "
             "[_lp_sched_l0=0, _lp_sched_l1=i, _lp_sched_l2=0, _lp_sched_l3=j, "
@@ -272,10 +272,10 @@ def perform_insn_bc_checks_with(sid_b, sid_c):
             "0 <= i < pi and 0 <= j < pj }"
             % (sid_b, sid_b)
             )
-        isl_sched_map_before_expected = align_isl_maps_by_var_names(
-            isl_sched_map_before_expected, isl_sched_map_before)
+        isl_sched_before_expected = align_isl_maps_by_var_names(
+            isl_sched_before_expected, isl_sched_before)
 
-        isl_sched_map_after_expected = isl.Map(
+        isl_sched_after_expected = isl.Map(
             "[pi, pj] -> { "
             "[_lp_sched_statement=%d, i, j] -> "
             "[_lp_sched_l0=0, _lp_sched_l1=i, _lp_sched_l2=0, _lp_sched_l3=j, "
@@ -283,11 +283,11 @@ def perform_insn_bc_checks_with(sid_b, sid_c):
             "0 <= i < pi and 0 <= j < pj }"
             % (sid_c, sid_c)
             )
-        isl_sched_map_after_expected = align_isl_maps_by_var_names(
-            isl_sched_map_after_expected, isl_sched_map_after)
+        isl_sched_after_expected = align_isl_maps_by_var_names(
+            isl_sched_after_expected, isl_sched_after)
 
-        assert isl_sched_map_before == isl_sched_map_before_expected
-        assert isl_sched_map_after == isl_sched_map_after_expected
+        assert isl_sched_before == isl_sched_before_expected
+        assert isl_sched_after == isl_sched_after_expected
 
     if sched_bc.stmt_instance_before.stmt.int_id == 0:
         perform_insn_bc_checks_with(0, 1)
@@ -303,14 +303,14 @@ def perform_insn_bd_checks_with(sid_b, sid_d):
         assert sched_bd.stmt_instance_before.lex_points == [sid_b, 'i', 0, 'j', 0]
         assert sched_bd.stmt_instance_after.lex_points == [sid_d, 't', 0, 0, 0]
 
-        # Get two isl maps representing the LexSchedule
+        # Get two isl maps from the PairwiseScheduleBuilder
 
-        isl_sched_map_before, isl_sched_map_after = get_isl_maps_for_LexSchedule(
-            sched_bd, knl)
+        isl_sched_before, isl_sched_after = \
+            get_isl_maps_from_PairwiseScheduleBuilder(sched_bd, knl)
 
         # Create expected maps, align, compare
 
-        isl_sched_map_before_expected = isl.Map(
+        isl_sched_before_expected = isl.Map(
             "[pi, pj] -> { "
             "[_lp_sched_statement=%d, i, j] -> "
             "[_lp_sched_l0=%d, _lp_sched_l1=i, _lp_sched_l2=0, _lp_sched_l3=j, "
@@ -318,10 +318,10 @@ def perform_insn_bd_checks_with(sid_b, sid_d):
             "0 <= i < pi and 0 <= j < pj }"
             % (sid_b, sid_b)
             )
-        isl_sched_map_before_expected = align_isl_maps_by_var_names(
-            isl_sched_map_before_expected, isl_sched_map_before)
+        isl_sched_before_expected = align_isl_maps_by_var_names(
+            isl_sched_before_expected, isl_sched_before)
 
-        isl_sched_map_after_expected = isl.Map(
+        isl_sched_after_expected = isl.Map(
             "[pt] -> { "
             "[_lp_sched_statement=%d, t] -> "
             "[_lp_sched_l0=%d, _lp_sched_l1=t, _lp_sched_l2=0, _lp_sched_l3=0, "
@@ -329,11 +329,11 @@ def perform_insn_bd_checks_with(sid_b, sid_d):
             "0 <= t < pt }"
             % (sid_d, sid_d)
             )
-        isl_sched_map_after_expected = align_isl_maps_by_var_names(
-            isl_sched_map_after_expected, isl_sched_map_after)
+        isl_sched_after_expected = align_isl_maps_by_var_names(
+            isl_sched_after_expected, isl_sched_after)
 
-        assert isl_sched_map_before == isl_sched_map_before_expected
-        assert isl_sched_map_after == isl_sched_map_after_expected
+        assert isl_sched_before == isl_sched_before_expected
+        assert isl_sched_after == isl_sched_after_expected
 
     if sched_bd.stmt_instance_before.stmt.int_id == 0:
         perform_insn_bd_checks_with(0, 1)
@@ -349,14 +349,14 @@ def perform_insn_cd_checks_with(sid_c, sid_d):
         assert sched_cd.stmt_instance_before.lex_points == [sid_c, 'i', 0, 'j', 0]
         assert sched_cd.stmt_instance_after.lex_points == [sid_d, 't', 0, 0, 0]
 
-        # Get two isl maps representing the LexSchedule
+        # Get two isl maps from the PairwiseScheduleBuilder
 
-        isl_sched_map_before, isl_sched_map_after = get_isl_maps_for_LexSchedule(
-            sched_cd, knl)
+        isl_sched_before, isl_sched_after = \
+            get_isl_maps_from_PairwiseScheduleBuilder(sched_cd, knl)
 
         # Create expected maps, align, compare
 
-        isl_sched_map_before_expected = isl.Map(
+        isl_sched_before_expected = isl.Map(
             "[pi, pj] -> { "
             "[_lp_sched_statement=%d, i, j] -> "
             "[_lp_sched_l0=%d, _lp_sched_l1=i, _lp_sched_l2=0, _lp_sched_l3=j, "
@@ -364,10 +364,10 @@ def perform_insn_cd_checks_with(sid_c, sid_d):
             "0 <= i < pi and 0 <= j < pj }"
             % (sid_c, sid_c)
             )
-        isl_sched_map_before_expected = align_isl_maps_by_var_names(
-            isl_sched_map_before_expected, isl_sched_map_before)
+        isl_sched_before_expected = align_isl_maps_by_var_names(
+            isl_sched_before_expected, isl_sched_before)
 
-        isl_sched_map_after_expected = isl.Map(
+        isl_sched_after_expected = isl.Map(
             "[pt] -> { "
             "[_lp_sched_statement=%d, t] -> "
             "[_lp_sched_l0=%d, _lp_sched_l1=t, _lp_sched_l2=0, _lp_sched_l3=0, "
@@ -375,11 +375,11 @@ def perform_insn_cd_checks_with(sid_c, sid_d):
             "0 <= t < pt }"
             % (sid_d, sid_d)
             )
-        isl_sched_map_after_expected = align_isl_maps_by_var_names(
-            isl_sched_map_after_expected, isl_sched_map_after)
+        isl_sched_after_expected = align_isl_maps_by_var_names(
+            isl_sched_after_expected, isl_sched_after)
 
-        assert isl_sched_map_before == isl_sched_map_before_expected
-        assert isl_sched_map_after == isl_sched_map_after_expected
+        assert isl_sched_before == isl_sched_before_expected
+        assert isl_sched_after == isl_sched_after_expected
 
     if sched_cd.stmt_instance_before.stmt.int_id == 0:
         perform_insn_cd_checks_with(0, 1)

From e5146b0cf846184562eac98a05f10fb35650d49c Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Tue, 26 May 2020 10:22:29 -0500
Subject: [PATCH 056/460] change a few variable names to be consistent with
 name changes for LexSchedule

---
 loopy/schedule/checker/__init__.py | 12 +++++++-----
 loopy/schedule/checker/schedule.py |  4 ++--
 2 files changed, 9 insertions(+), 7 deletions(-)

diff --git a/loopy/schedule/checker/__init__.py b/loopy/schedule/checker/__init__.py
index 1da7b1e16..4ce370b4c 100644
--- a/loopy/schedule/checker/__init__.py
+++ b/loopy/schedule/checker/__init__.py
@@ -97,12 +97,12 @@ def get_schedule_for_statement_pair(
 
 # {{{ Get isl map pair from PairwiseScheduleBuilder
 
-def get_isl_maps_from_PairwiseScheduleBuilder(lex_sched, knl):
+def get_isl_maps_from_PairwiseScheduleBuilder(sched_builder, knl):
     """Create a pair of :class:`islpy.Map`s representing a
         sub-schedule as two mappings from statement instances to lexicographic
         time, one for the dependee statement and one for the depender.
 
-    :arg lex_sched: A
+    :arg sched_builder: A
         :class:`loopy.schedule.checker.schedule.PairwiseScheduleBuilder`
         representing the order of two statements as a mapping from
         :class:`loopy.schedule.checker.PairwiseScheduleStatementInstanceSet`
@@ -119,13 +119,15 @@ def get_isl_maps_from_PairwiseScheduleBuilder(lex_sched, knl):
 
     # {{{ Get iname domains
     dom_before = knl.get_inames_domain(
-        knl.id_to_insn[lex_sched.stmt_instance_before.stmt.insn_id].within_inames)
+        knl.id_to_insn[
+            sched_builder.stmt_instance_before.stmt.insn_id].within_inames)
     dom_after = knl.get_inames_domain(
-        knl.id_to_insn[lex_sched.stmt_instance_after.stmt.insn_id].within_inames)
+        knl.id_to_insn[
+            sched_builder.stmt_instance_after.stmt.insn_id].within_inames)
     # }}}
 
     # {{{ Get isl maps
-    return lex_sched.create_isl_maps(dom_before, dom_after)
+    return sched_builder.create_isl_maps(dom_before, dom_after)
     # }}}
 
 # }}}
diff --git a/loopy/schedule/checker/schedule.py b/loopy/schedule/checker/schedule.py
index 4d80d8945..aaef5de8e 100644
--- a/loopy/schedule/checker/schedule.py
+++ b/loopy/schedule/checker/schedule.py
@@ -168,7 +168,7 @@ def __init__(
 
         from loopy.schedule import (EnterLoop, LeaveLoop, Barrier, RunInstruction)
 
-        # go through linearization_items_ordered and generate self.lex_schedule
+        # go through linearization_items_ordered and generate pairwise sub-schedule
 
         # keep track of the next tuple of points in our lexicographic
         # ordering, initially this as a 1-d point with value 0
@@ -269,7 +269,7 @@ def __init__(
             if self.stmt_instance_before and self.stmt_instance_after:
                 break
 
-        # At this point, lex_schedule may contain lex point tuples
+        # At this point, pairwise sub-schedule may contain lex point tuples
         # missing dimensions; the values in these missing dims should
         # be zero, so add them.
         self.pad_lex_tuples_with_zeros()

From 3b5d4caa5a5f1e272172370f949bcd19a54d9b0a Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Tue, 26 May 2020 10:27:36 -0500
Subject: [PATCH 057/460] rename
 LexScheduleStatement->PairwiseScheduleStatement,
 get_isl_maps_for_LexSchedule->get_isl_maps_from_PairwiseScheduleBuilder,
 LexSchedule->PairwiseScheduleBuilder; also rename other variables for
 consistency

---
 test/test_linearization_checker.py | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/test/test_linearization_checker.py b/test/test_linearization_checker.py
index df40c1dd5..255d2b0a6 100644
--- a/test/test_linearization_checker.py
+++ b/test/test_linearization_checker.py
@@ -46,9 +46,9 @@
     faulthandler.enable()
 
 
-# {{{ test LexSchedule and isl map creation
+# {{{ test PairwiseScheduleBuilder and isl map creation
 
-def test_lexschedule_and_islmap_creation():
+def test_pairwise_schedule_and_islmap_creation():
     import islpy as isl
     from loopy.schedule.checker import (
         get_schedule_for_statement_pair,
@@ -397,7 +397,7 @@ def test_statement_instance_ordering_creation():
     import islpy as isl
     from loopy.schedule.checker import (
         get_schedule_for_statement_pair,
-        get_isl_maps_for_LexSchedule,
+        get_isl_maps_from_PairwiseScheduleBuilder,
     )
     from loopy.schedule.checker.utils import (
         align_isl_maps_by_var_names,
@@ -451,19 +451,19 @@ def check_sio_for_insn_pair(
             expected_sio,
             ):
 
-        lex_sched = get_schedule_for_statement_pair(
+        sched_builder = get_schedule_for_statement_pair(
             knl,
             linearization_items,
             insn_id_before,
             insn_id_after,
             )
 
-        # Get two isl maps representing the LexSchedule
-        isl_sched_map_before, isl_sched_map_after = get_isl_maps_for_LexSchedule(
-            lex_sched, knl)
+        # Get two isl maps from the PairwiseScheduleBuilder
+        isl_sched_map_before, isl_sched_map_after = \
+            get_isl_maps_from_PairwiseScheduleBuilder(sched_builder, knl)
 
         # get map representing lexicographic ordering
-        sched_lex_order_map = lex_sched.get_lex_order_map_for_sched_space()
+        sched_lex_order_map = sched_builder.get_lex_order_map_for_sched_space()
 
         assert sched_lex_order_map == expected_lex_order_map
 

From d06dd8980c6b39588cc63835bb040580f4dc764d Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Tue, 26 May 2020 10:53:52 -0500
Subject: [PATCH 058/460] remove dependee/depender language for now

---
 loopy/schedule/checker/schedule.py | 46 ++++++++++++++++--------------
 1 file changed, 24 insertions(+), 22 deletions(-)

diff --git a/loopy/schedule/checker/schedule.py b/loopy/schedule/checker/schedule.py
index aaef5de8e..27b76c847 100644
--- a/loopy/schedule/checker/schedule.py
+++ b/loopy/schedule/checker/schedule.py
@@ -111,19 +111,21 @@ class PairwiseScheduleBuilder(object):
 
     .. attribute:: stmt_instance_before
 
-       A :class:`PairwiseScheduleStatementInstanceSet` describing the dependee
-       statement's order relative to the depender statment by mapping
-       a statement to a point or set of points in a lexicographic
-       ordering. Points in lexicographic ordering are represented as
-       a list of :class:`int` or as :class:`str` :mod:`loopy` inames.
+       A :class:`PairwiseScheduleStatementInstanceSet` whose ordering relative
+       to `stmt_instance_after is described by PairwiseScheduleBuilder. This
+       is achieved by mapping the statement instances in both sets to points
+       in a single lexicographic ordering. Points in lexicographic ordering
+       are represented as a list of :class:`int` or as :class:`str`
+       :mod:`loopy` inames.
 
     .. attribute:: stmt_instance_after
 
-       A :class:`PairwiseScheduleStatementInstanceSet` describing the depender
-       statement's order relative to the dependee statment by mapping
-       a statement to a point or set of points in a lexicographic
-       ordering. Points in lexicographic ordering are represented as
-       a list of :class:`int` or as :class:`str` :mod:`loopy` inames.
+       A :class:`PairwiseScheduleStatementInstanceSet` whose ordering relative
+       to `stmt_instance_before is described by PairwiseScheduleBuilder. This
+       is achieved by mapping the statement instances in both sets to points
+       in a single lexicographic ordering. Points in lexicographic ordering
+       are represented as a list of :class:`int` or as :class:`str`
+       :mod:`loopy` inames.
 
     .. attribute:: statement_var_name
 
@@ -153,10 +155,10 @@ def __init__(
             order will be described by this :class:`PairwiseScheduleBuilder`.
 
         :arg before_insn_id: A :class:`str` instruction id specifying
-            the dependee in this pair of instructions.
+            stmt_instance_before in this pair of instructions.
 
         :arg after_insn_id: A :class:`str` instruction id specifying
-            the depender in this pair of instructions.
+            stmt_instancce_after in this pair of instructions.
 
         """
 
@@ -308,30 +310,30 @@ def create_isl_maps(
             ):
         """Create two isl maps representing lex schedule as two mappings
             from statement instances to lexicographic time, one for
-            the dependee and one for the depender.
+            ``stmt_instance_before`` and one for ``stmt_instance_after``.
 
         :arg dom_before: A :class:`islpy.BasicSet` representing the
-            domain for the dependee statement.
+            domain for ``stmt_instance_before``.
 
         :arg dom_after: A :class:`islpy.BasicSet` representing the
-            domain for the dependee statement.
+            domain for ``stmt_instance_after``.
 
         :arg dom_inames_ordered_before: A list of :class:`str`
-            representing the union of inames used in instances of the
-            dependee statement. ``statement_var_name`` and
+            representing the union of inames used in
+            ``stmt_instance_before``. ``statement_var_name`` and
             ``dom_inames_ordered_before`` are the names of the dims of
-            the space of the ISL map domain for the dependee.
+            the space of the ISL map domain.
 
         :arg dom_inames_ordered_after: A list of :class:`str`
-            representing the union of inames used in instances of the
-            depender statement. ``statement_var_name`` and
+            representing the union of inames used in
+            ``stmt_instance_after``. ``statement_var_name`` and
             ``dom_inames_ordered_after`` are the names of the dims of
-            the space of the ISL map domain for the depender.
+            the space of the ISL map domain.
 
         :returns: A two-tuple containing two :class:`islpy.Map`s
             representing the schedule as two mappings
             from statement instances to lexicographic time, one for
-            the dependee and one for the depender.
+            each of the two :class:`PairwiseScheduleStatementInstanceSet`s.
 
         """
 

From 2574becdadd587f37deb26bad7131610bb50188c Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Sun, 31 May 2020 21:10:07 -0500
Subject: [PATCH 059/460] change identifier prefix for sched checker
 identifiers from _lp_sched_->lp_linchk_

---
 loopy/schedule/checker/schedule.py |  4 +-
 test/test_linearization_checker.py | 72 +++++++++++++++---------------
 2 files changed, 38 insertions(+), 38 deletions(-)

diff --git a/loopy/schedule/checker/schedule.py b/loopy/schedule/checker/schedule.py
index 27b76c847..b3f21a6c3 100644
--- a/loopy/schedule/checker/schedule.py
+++ b/loopy/schedule/checker/schedule.py
@@ -140,8 +140,8 @@ class PairwiseScheduleBuilder(object):
 
     """
 
-    statement_var_name = "_lp_sched_statement"
-    lex_var_prefix = "_lp_sched_l"
+    statement_var_name = "_lp_linchk_statement"
+    lex_var_prefix = "_lp_linchk_l"
 
     def __init__(
             self,
diff --git a/test/test_linearization_checker.py b/test/test_linearization_checker.py
index a4696c3b2..7a67ab824 100644
--- a/test/test_linearization_checker.py
+++ b/test/test_linearization_checker.py
@@ -145,9 +145,9 @@ def test_lexschedule_and_islmap_creation():
 
     isl_sched_before_expected = isl.Map(
         "[pi, pk] -> { "
-        "[_lp_sched_statement=0, i, k] -> "
-        "[_lp_sched_l0=0, _lp_sched_l1=i, _lp_sched_l2=0, _lp_sched_l3=k, "
-        "_lp_sched_l4=0] : "
+        "[_lp_linchk_statement=0, i, k] -> "
+        "[_lp_linchk_l0=0, _lp_linchk_l1=i, _lp_linchk_l2=0, _lp_linchk_l3=k, "
+        "_lp_linchk_l4=0] : "
         "0 <= i < pi and 0 <= k < pk }"
         )
     isl_sched_before_expected = align_isl_maps_by_var_names(
@@ -155,9 +155,9 @@ def test_lexschedule_and_islmap_creation():
 
     isl_sched_after_expected = isl.Map(
         "[pi, pj] -> { "
-        "[_lp_sched_statement=1, i, j] -> "
-        "[_lp_sched_l0=0, _lp_sched_l1=i, _lp_sched_l2=1, _lp_sched_l3=j, "
-        "_lp_sched_l4=0] : "
+        "[_lp_linchk_statement=1, i, j] -> "
+        "[_lp_linchk_l0=0, _lp_linchk_l1=i, _lp_linchk_l2=1, _lp_linchk_l3=j, "
+        "_lp_linchk_l4=0] : "
         "0 <= i < pi and 0 <= j < pj }"
         )
     isl_sched_after_expected = align_isl_maps_by_var_names(
@@ -181,9 +181,9 @@ def test_lexschedule_and_islmap_creation():
 
     isl_sched_before_expected = isl.Map(
         "[pi, pk] -> { "
-        "[_lp_sched_statement=0, i, k] -> "
-        "[_lp_sched_l0=0, _lp_sched_l1=i, _lp_sched_l2=0, _lp_sched_l3=k, "
-        "_lp_sched_l4=0] : "
+        "[_lp_linchk_statement=0, i, k] -> "
+        "[_lp_linchk_l0=0, _lp_linchk_l1=i, _lp_linchk_l2=0, _lp_linchk_l3=k, "
+        "_lp_linchk_l4=0] : "
         "0 <= i < pi and 0 <= k < pk }"
         )
     isl_sched_before_expected = align_isl_maps_by_var_names(
@@ -191,9 +191,9 @@ def test_lexschedule_and_islmap_creation():
 
     isl_sched_after_expected = isl.Map(
         "[pi, pj] -> { "
-        "[_lp_sched_statement=1, i, j] -> "
-        "[_lp_sched_l0=0, _lp_sched_l1=i, _lp_sched_l2=1, _lp_sched_l3=j, "
-        "_lp_sched_l4=0] : "
+        "[_lp_linchk_statement=1, i, j] -> "
+        "[_lp_linchk_l0=0, _lp_linchk_l1=i, _lp_linchk_l2=1, _lp_linchk_l3=j, "
+        "_lp_linchk_l4=0] : "
         "0 <= i < pi and 0 <= j < pj }"
         )
     isl_sched_after_expected = align_isl_maps_by_var_names(
@@ -220,9 +220,9 @@ def perform_insn_ad_checks_with(sid_a, sid_d):
 
         isl_sched_before_expected = isl.Map(
             "[pi, pk] -> { "
-            "[_lp_sched_statement=%d, i, k] -> "
-            "[_lp_sched_l0=%d, _lp_sched_l1=i, _lp_sched_l2=0, _lp_sched_l3=k, "
-            "_lp_sched_l4=0] : "
+            "[_lp_linchk_statement=%d, i, k] -> "
+            "[_lp_linchk_l0=%d, _lp_linchk_l1=i, _lp_linchk_l2=0, _lp_linchk_l3=k, "
+            "_lp_linchk_l4=0] : "
             "0 <= i < pi and 0 <= k < pk }"
             % (sid_a, sid_a)
             )
@@ -231,9 +231,9 @@ def perform_insn_ad_checks_with(sid_a, sid_d):
 
         isl_sched_after_expected = isl.Map(
             "[pt] -> { "
-            "[_lp_sched_statement=%d, t] -> "
-            "[_lp_sched_l0=%d, _lp_sched_l1=t, _lp_sched_l2=0, _lp_sched_l3=0, "
-            "_lp_sched_l4=0] : "
+            "[_lp_linchk_statement=%d, t] -> "
+            "[_lp_linchk_l0=%d, _lp_linchk_l1=t, _lp_linchk_l2=0, _lp_linchk_l3=0, "
+            "_lp_linchk_l4=0] : "
             "0 <= t < pt }"
             % (sid_d, sid_d)
             )
@@ -266,9 +266,9 @@ def perform_insn_bc_checks_with(sid_b, sid_c):
 
         isl_sched_before_expected = isl.Map(
             "[pi, pj] -> { "
-            "[_lp_sched_statement=%d, i, j] -> "
-            "[_lp_sched_l0=0, _lp_sched_l1=i, _lp_sched_l2=0, _lp_sched_l3=j, "
-            "_lp_sched_l4=%d] : "
+            "[_lp_linchk_statement=%d, i, j] -> "
+            "[_lp_linchk_l0=0, _lp_linchk_l1=i, _lp_linchk_l2=0, _lp_linchk_l3=j, "
+            "_lp_linchk_l4=%d] : "
             "0 <= i < pi and 0 <= j < pj }"
             % (sid_b, sid_b)
             )
@@ -277,9 +277,9 @@ def perform_insn_bc_checks_with(sid_b, sid_c):
 
         isl_sched_after_expected = isl.Map(
             "[pi, pj] -> { "
-            "[_lp_sched_statement=%d, i, j] -> "
-            "[_lp_sched_l0=0, _lp_sched_l1=i, _lp_sched_l2=0, _lp_sched_l3=j, "
-            "_lp_sched_l4=%d] : "
+            "[_lp_linchk_statement=%d, i, j] -> "
+            "[_lp_linchk_l0=0, _lp_linchk_l1=i, _lp_linchk_l2=0, _lp_linchk_l3=j, "
+            "_lp_linchk_l4=%d] : "
             "0 <= i < pi and 0 <= j < pj }"
             % (sid_c, sid_c)
             )
@@ -312,9 +312,9 @@ def perform_insn_bd_checks_with(sid_b, sid_d):
 
         isl_sched_before_expected = isl.Map(
             "[pi, pj] -> { "
-            "[_lp_sched_statement=%d, i, j] -> "
-            "[_lp_sched_l0=%d, _lp_sched_l1=i, _lp_sched_l2=0, _lp_sched_l3=j, "
-            "_lp_sched_l4=0] : "
+            "[_lp_linchk_statement=%d, i, j] -> "
+            "[_lp_linchk_l0=%d, _lp_linchk_l1=i, _lp_linchk_l2=0, _lp_linchk_l3=j, "
+            "_lp_linchk_l4=0] : "
             "0 <= i < pi and 0 <= j < pj }"
             % (sid_b, sid_b)
             )
@@ -323,9 +323,9 @@ def perform_insn_bd_checks_with(sid_b, sid_d):
 
         isl_sched_after_expected = isl.Map(
             "[pt] -> { "
-            "[_lp_sched_statement=%d, t] -> "
-            "[_lp_sched_l0=%d, _lp_sched_l1=t, _lp_sched_l2=0, _lp_sched_l3=0, "
-            "_lp_sched_l4=0] : "
+            "[_lp_linchk_statement=%d, t] -> "
+            "[_lp_linchk_l0=%d, _lp_linchk_l1=t, _lp_linchk_l2=0, _lp_linchk_l3=0, "
+            "_lp_linchk_l4=0] : "
             "0 <= t < pt }"
             % (sid_d, sid_d)
             )
@@ -358,9 +358,9 @@ def perform_insn_cd_checks_with(sid_c, sid_d):
 
         isl_sched_before_expected = isl.Map(
             "[pi, pj] -> { "
-            "[_lp_sched_statement=%d, i, j] -> "
-            "[_lp_sched_l0=%d, _lp_sched_l1=i, _lp_sched_l2=0, _lp_sched_l3=j, "
-            "_lp_sched_l4=0] : "
+            "[_lp_linchk_statement=%d, i, j] -> "
+            "[_lp_linchk_l0=%d, _lp_linchk_l1=i, _lp_linchk_l2=0, _lp_linchk_l3=j, "
+            "_lp_linchk_l4=0] : "
             "0 <= i < pi and 0 <= j < pj }"
             % (sid_c, sid_c)
             )
@@ -369,9 +369,9 @@ def perform_insn_cd_checks_with(sid_c, sid_d):
 
         isl_sched_after_expected = isl.Map(
             "[pt] -> { "
-            "[_lp_sched_statement=%d, t] -> "
-            "[_lp_sched_l0=%d, _lp_sched_l1=t, _lp_sched_l2=0, _lp_sched_l3=0, "
-            "_lp_sched_l4=0] : "
+            "[_lp_linchk_statement=%d, t] -> "
+            "[_lp_linchk_l0=%d, _lp_linchk_l1=t, _lp_linchk_l2=0, _lp_linchk_l3=0, "
+            "_lp_linchk_l4=0] : "
             "0 <= t < pt }"
             % (sid_d, sid_d)
             )

From 626140b832828432fd5e78b1511b5054d5819214 Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Sun, 31 May 2020 21:17:29 -0500
Subject: [PATCH 060/460] rename
 PairwiseScheduleStatementInstanceSet->StatementInstanceSet

---
 loopy/schedule/checker/__init__.py |  6 +++---
 loopy/schedule/checker/schedule.py | 14 +++++++-------
 2 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/loopy/schedule/checker/__init__.py b/loopy/schedule/checker/__init__.py
index 4ce370b4c..6769b56b7 100644
--- a/loopy/schedule/checker/__init__.py
+++ b/loopy/schedule/checker/__init__.py
@@ -31,7 +31,7 @@ def get_schedule_for_statement_pair(
         ):
     """Create a :class:`loopy.schedule.checker.schedule.PairwiseScheduleBuilder`
         representing the order of two statements as a mapping from
-        :class:`loopy.schedule.checker.PairwiseScheduleStatementInstanceSet`
+        :class:`loopy.schedule.checker.StatementInstanceSet`
         to lexicographic time.
 
     :arg knl: A :class:`loopy.kernel.LoopKernel` containing the
@@ -52,7 +52,7 @@ def get_schedule_for_statement_pair(
 
     :returns: A :class:`loopy.schedule.checker.schedule.PairwiseScheduleBuilder`
         representing the order of two statements as a mapping from
-        :class:`loopy.schedule.checker.PairwiseScheduleStatementInstanceSet`
+        :class:`loopy.schedule.checker.StatementInstanceSet`
         to lexicographic time.
     """
 
@@ -105,7 +105,7 @@ def get_isl_maps_from_PairwiseScheduleBuilder(sched_builder, knl):
     :arg sched_builder: A
         :class:`loopy.schedule.checker.schedule.PairwiseScheduleBuilder`
         representing the order of two statements as a mapping from
-        :class:`loopy.schedule.checker.PairwiseScheduleStatementInstanceSet`
+        :class:`loopy.schedule.checker.StatementInstanceSet`
         to lexicographic time.
 
     :arg knl: A :class:`loopy.kernel.LoopKernel` containing the
diff --git a/loopy/schedule/checker/schedule.py b/loopy/schedule/checker/schedule.py
index b3f21a6c3..8f55eff39 100644
--- a/loopy/schedule/checker/schedule.py
+++ b/loopy/schedule/checker/schedule.py
@@ -72,7 +72,7 @@ def __str__(self):
         return "%s%s" % (self.insn_id, int_id)
 
 
-class PairwiseScheduleStatementInstanceSet(object):
+class StatementInstanceSet(object):
     """A representation of a set of (non-concurrent) instances of a
     statement being executed. The ordering of the instances is described
     by the `lex_points` attribute, a list representing points in a
@@ -111,7 +111,7 @@ class PairwiseScheduleBuilder(object):
 
     .. attribute:: stmt_instance_before
 
-       A :class:`PairwiseScheduleStatementInstanceSet` whose ordering relative
+       A :class:`StatementInstanceSet` whose ordering relative
        to `stmt_instance_after is described by PairwiseScheduleBuilder. This
        is achieved by mapping the statement instances in both sets to points
        in a single lexicographic ordering. Points in lexicographic ordering
@@ -120,7 +120,7 @@ class PairwiseScheduleBuilder(object):
 
     .. attribute:: stmt_instance_after
 
-       A :class:`PairwiseScheduleStatementInstanceSet` whose ordering relative
+       A :class:`StatementInstanceSet` whose ordering relative
        to `stmt_instance_before is described by PairwiseScheduleBuilder. This
        is achieved by mapping the statement instances in both sets to points
        in a single lexicographic ordering. Points in lexicographic ordering
@@ -236,7 +236,7 @@ def __init__(
 
                 if lp_insn_id == before_insn_id:
                     # add before sched item
-                    self.stmt_instance_before = PairwiseScheduleStatementInstanceSet(
+                    self.stmt_instance_before = StatementInstanceSet(
                             PairwiseScheduleStatement(
                                 insn_id=lp_insn_id,
                                 int_id=next_sid,  # int representing insn
@@ -246,7 +246,7 @@ def __init__(
 
                 if lp_insn_id == after_insn_id:
                     # add after sched item
-                    self.stmt_instance_after = PairwiseScheduleStatementInstanceSet(
+                    self.stmt_instance_after = StatementInstanceSet(
                             PairwiseScheduleStatement(
                                 insn_id=lp_insn_id,
                                 int_id=next_sid,  # int representing insn
@@ -289,7 +289,7 @@ def pad_lex_tuples_with_zeros(self):
         """
 
         def _pad_lex_tuple_with_zeros(stmt_inst, length):
-            return PairwiseScheduleStatementInstanceSet(
+            return StatementInstanceSet(
                 stmt_inst.stmt,
                 stmt_inst.lex_points[:] + [0]*(length-len(stmt_inst.lex_points)),
                 )
@@ -333,7 +333,7 @@ def create_isl_maps(
         :returns: A two-tuple containing two :class:`islpy.Map`s
             representing the schedule as two mappings
             from statement instances to lexicographic time, one for
-            each of the two :class:`PairwiseScheduleStatementInstanceSet`s.
+            each of the two :class:`StatementInstanceSet`s.
 
         """
 

From 132a1c699a6802714aa18a0c2031c9e469d94c77 Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Sun, 31 May 2020 21:23:02 -0500
Subject: [PATCH 061/460] rename PairwiseScheduleStatement->StatementRef

---
 loopy/schedule/checker/schedule.py | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/loopy/schedule/checker/schedule.py b/loopy/schedule/checker/schedule.py
index 8f55eff39..d80b64520 100644
--- a/loopy/schedule/checker/schedule.py
+++ b/loopy/schedule/checker/schedule.py
@@ -23,8 +23,8 @@
 import islpy as isl
 
 
-class PairwiseScheduleStatement(object):
-    """A representation of a :mod:`loopy` statement.
+class StatementRef(object):
+    """A reference to a :mod:`loopy` statement.
 
     .. attribute:: insn_id
 
@@ -81,7 +81,7 @@ class StatementInstanceSet(object):
 
     .. attribute:: stmt
 
-       A :class:`PairwiseScheduleStatement`.
+       A :class:`StatementRef`.
 
     .. attribute:: lex_points
 
@@ -237,7 +237,7 @@ def __init__(
                 if lp_insn_id == before_insn_id:
                     # add before sched item
                     self.stmt_instance_before = StatementInstanceSet(
-                            PairwiseScheduleStatement(
+                            StatementRef(
                                 insn_id=lp_insn_id,
                                 int_id=next_sid,  # int representing insn
                                 ),
@@ -247,7 +247,7 @@ def __init__(
                 if lp_insn_id == after_insn_id:
                     # add after sched item
                     self.stmt_instance_after = StatementInstanceSet(
-                            PairwiseScheduleStatement(
+                            StatementRef(
                                 insn_id=lp_insn_id,
                                 int_id=next_sid,  # int representing insn
                                 ),
@@ -284,7 +284,7 @@ def max_lex_dims(self):
     def pad_lex_tuples_with_zeros(self):
         """Find the maximum number of lexicographic dimensions represented
             in the lexicographic ordering, and if any
-            :class:`PairwiseScheduleStatement` maps to a lex point tuple with
+            :class:`StatementRef` maps to a lex point tuple with
             fewer dimensions, add a zero for each of the missing dimensions.
         """
 

From 7a9a5bde0be356d90a012a1fab018c8d31520514 Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Sun, 31 May 2020 21:28:23 -0500
Subject: [PATCH 062/460] rename
 StatementInstanceSet.stmt->StatementInstanceSet.stmt_ref

---
 loopy/schedule/checker/__init__.py |  4 ++--
 loopy/schedule/checker/schedule.py | 14 +++++++-------
 test/test_linearization_checker.py |  8 ++++----
 3 files changed, 13 insertions(+), 13 deletions(-)

diff --git a/loopy/schedule/checker/__init__.py b/loopy/schedule/checker/__init__.py
index 6769b56b7..7729dbbb1 100644
--- a/loopy/schedule/checker/__init__.py
+++ b/loopy/schedule/checker/__init__.py
@@ -120,10 +120,10 @@ def get_isl_maps_from_PairwiseScheduleBuilder(sched_builder, knl):
     # {{{ Get iname domains
     dom_before = knl.get_inames_domain(
         knl.id_to_insn[
-            sched_builder.stmt_instance_before.stmt.insn_id].within_inames)
+            sched_builder.stmt_instance_before.stmt_ref.insn_id].within_inames)
     dom_after = knl.get_inames_domain(
         knl.id_to_insn[
-            sched_builder.stmt_instance_after.stmt.insn_id].within_inames)
+            sched_builder.stmt_instance_after.stmt_ref.insn_id].within_inames)
     # }}}
 
     # {{{ Get isl maps
diff --git a/loopy/schedule/checker/schedule.py b/loopy/schedule/checker/schedule.py
index d80b64520..8963fb576 100644
--- a/loopy/schedule/checker/schedule.py
+++ b/loopy/schedule/checker/schedule.py
@@ -79,7 +79,7 @@ class StatementInstanceSet(object):
     lexicographic ordering of statements. Each field in the list
     corresponds to a dimension in the lexicographic ordering.
 
-    .. attribute:: stmt
+    .. attribute:: stmt_ref
 
        A :class:`StatementRef`.
 
@@ -93,14 +93,14 @@ class StatementInstanceSet(object):
 
     def __init__(
             self,
-            stmt,
+            stmt_ref,
             lex_points,
             ):
-        self.stmt = stmt
+        self.stmt_ref = stmt_ref
         self.lex_points = lex_points
 
     def __str__(self):
-        return "{%s, %s}" % (self.stmt, self.lex_points)
+        return "{%s, %s}" % (self.stmt_ref, self.lex_points)
 
 
 class PairwiseScheduleBuilder(object):
@@ -290,7 +290,7 @@ def pad_lex_tuples_with_zeros(self):
 
         def _pad_lex_tuple_with_zeros(stmt_inst, length):
             return StatementInstanceSet(
-                stmt_inst.stmt,
+                stmt_inst.stmt_ref,
                 stmt_inst.lex_points[:] + [0]*(length-len(stmt_inst.lex_points)),
                 )
 
@@ -372,7 +372,7 @@ def _get_isl_map_for_stmt_inst(
             # Right now, statement instance tuples consist of single int.
             # Add all inames from domains to each map domain tuple.
             tuple_pair = [(
-                (stmt_inst.stmt.int_id, ) + tuple(dom_inames_ordered),
+                (stmt_inst.stmt_ref.int_id, ) + tuple(dom_inames_ordered),
                 stmt_inst.lex_points
                 )]
 
@@ -402,7 +402,7 @@ def __str__(self):
         def stringify_sched_stmt_instance(stmt_inst):
             return "{\n[%s=%s,<inames>] -> %s;\n}" % (
                 self.statement_var_name,
-                stmt_inst.stmt.int_id,
+                stmt_inst.stmt_ref.int_id,
                 stmt_inst.lex_points)
 
         return "Before: %s\nAfter: %s" % (
diff --git a/test/test_linearization_checker.py b/test/test_linearization_checker.py
index 7a67ab824..396fccf4f 100644
--- a/test/test_linearization_checker.py
+++ b/test/test_linearization_checker.py
@@ -243,7 +243,7 @@ def perform_insn_ad_checks_with(sid_a, sid_d):
         assert isl_sched_before == isl_sched_before_expected
         assert isl_sched_after == isl_sched_after_expected
 
-    if sched_ad.stmt_instance_before.stmt.int_id == 0:
+    if sched_ad.stmt_instance_before.stmt_ref.int_id == 0:
         perform_insn_ad_checks_with(0, 1)
     else:
         perform_insn_ad_checks_with(1, 0)
@@ -289,7 +289,7 @@ def perform_insn_bc_checks_with(sid_b, sid_c):
         assert isl_sched_before == isl_sched_before_expected
         assert isl_sched_after == isl_sched_after_expected
 
-    if sched_bc.stmt_instance_before.stmt.int_id == 0:
+    if sched_bc.stmt_instance_before.stmt_ref.int_id == 0:
         perform_insn_bc_checks_with(0, 1)
     else:
         perform_insn_bc_checks_with(1, 0)
@@ -335,7 +335,7 @@ def perform_insn_bd_checks_with(sid_b, sid_d):
         assert isl_sched_before == isl_sched_before_expected
         assert isl_sched_after == isl_sched_after_expected
 
-    if sched_bd.stmt_instance_before.stmt.int_id == 0:
+    if sched_bd.stmt_instance_before.stmt_ref.int_id == 0:
         perform_insn_bd_checks_with(0, 1)
     else:
         perform_insn_bd_checks_with(1, 0)
@@ -381,7 +381,7 @@ def perform_insn_cd_checks_with(sid_c, sid_d):
         assert isl_sched_before == isl_sched_before_expected
         assert isl_sched_after == isl_sched_after_expected
 
-    if sched_cd.stmt_instance_before.stmt.int_id == 0:
+    if sched_cd.stmt_instance_before.stmt_ref.int_id == 0:
         perform_insn_cd_checks_with(0, 1)
     else:
         perform_insn_cd_checks_with(1, 0)

From 30054b19b0751045e4515a95420505cc57f99e3d Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Sun, 31 May 2020 21:34:58 -0500
Subject: [PATCH 063/460] keep docstring example consistent with identifier
 naming policy

---
 loopy/schedule/checker/schedule.py | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/loopy/schedule/checker/schedule.py b/loopy/schedule/checker/schedule.py
index 8963fb576..26ceb49c4 100644
--- a/loopy/schedule/checker/schedule.py
+++ b/loopy/schedule/checker/schedule.py
@@ -136,7 +136,11 @@ class PairwiseScheduleBuilder(object):
 
        A :class:`str` specifying the prefix to be used for the variables
        representing the dimensions in the lexicographic ordering. E.g.,
-       a prefix of "lex" might yield variables "lex0", "lex1", "lex2".
+       a prefix of "_lp_linchk_lex" might yield variables "_lp_linchk_lex0",
+       "_lp_linchk_lex1", "_lp_linchk_lex2". Note the identifier prefix
+       policies described in the documentation under
+       *Loopy's Model of a Kernel* -> *Identifiers*.
+       .
 
     """
 

From a9bbc92e840fe9deab63be773141d2be0bbabb17 Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Sun, 31 May 2020 22:25:13 -0500
Subject: [PATCH 064/460] add example usage to docstring for
 get_schedule_for_statement_pair() and PairwiseScheduleBuilder()

---
 loopy/schedule/checker/__init__.py | 39 ++++++++++++++++++++++-
 loopy/schedule/checker/schedule.py | 50 +++++++++++++++++++++++++++++-
 2 files changed, 87 insertions(+), 2 deletions(-)

diff --git a/loopy/schedule/checker/__init__.py b/loopy/schedule/checker/__init__.py
index 7729dbbb1..c454254f6 100644
--- a/loopy/schedule/checker/__init__.py
+++ b/loopy/schedule/checker/__init__.py
@@ -40,7 +40,7 @@ def get_schedule_for_statement_pair(
     :arg linearization_items: A list of :class:`loopy.schedule.ScheduleItem`
         (to be renamed to `loopy.schedule.LinearizationItem`) containing
         the two linearization items for which a schedule will be
-        created. This list may be a partial linearization for a
+        created. This list may be a *partial* linearization for a
         kernel since this function may be used during the linearization
         process.
 
@@ -54,6 +54,43 @@ def get_schedule_for_statement_pair(
         representing the order of two statements as a mapping from
         :class:`loopy.schedule.checker.StatementInstanceSet`
         to lexicographic time.
+
+    Example usage::
+
+        # Make kernel --------------------------------------------------------
+        knl = lp.make_kernel(
+            "{[i,j,k]: 0<=i<pi and 0<=j<pj and 0<=k<pk}",
+            [
+                "a[i,j] = j  {id=insn_a}",
+                "b[i,k] = k+a[i,0]  {id=insn_b,dep=insn_a}",
+            ])
+        knl = lp.add_and_infer_dtypes(knl, {"a": np.float32, "b": np.float32})
+        knl = lp.prioritize_loops(knl, "i,j")
+        knl = lp.prioritize_loops(knl, "i,k")
+
+        # Get a linearization
+        knl = lp.get_one_linearized_kernel(lp.preprocess_kernel(knl))
+
+        # Get a pairwise schedule* -------------------------------------------
+
+        from loopy.schedule.checker import (
+            get_schedule_for_statement_pair,
+        )
+        sched_builder_ab = get_schedule_for_statement_pair(
+            knl,
+            knl.linearization,
+            "insn_a",
+            "insn_b",
+            )
+
+        # Get two isl maps from the PairwiseScheduleBuilder ------------------
+
+        from loopy.schedule.checker import (
+            get_isl_maps_from_PairwiseScheduleBuilder,
+        )
+        sched_a, sched_b = get_isl_maps_from_PairwiseScheduleBuilder(
+            sched_builder_ab, knl)
+
     """
 
     # {{{ Preprocess if not already preprocessed
diff --git a/loopy/schedule/checker/schedule.py b/loopy/schedule/checker/schedule.py
index 26ceb49c4..d7cc7b454 100644
--- a/loopy/schedule/checker/schedule.py
+++ b/loopy/schedule/checker/schedule.py
@@ -140,7 +140,55 @@ class PairwiseScheduleBuilder(object):
        "_lp_linchk_lex1", "_lp_linchk_lex2". Note the identifier prefix
        policies described in the documentation under
        *Loopy's Model of a Kernel* -> *Identifiers*.
-       .
+
+    Example usage::
+
+        # Make kernel --------------------------------------------------------
+        knl = lp.make_kernel(
+            "{[i,j,k]: 0<=i<pi and 0<=j<pj and 0<=k<pk}",
+            [
+                "a[i,j] = j  {id=insn_a}",
+                "b[i,k] = k+a[i,0]  {id=insn_b,dep=insn_a}",
+            ])
+        knl = lp.add_and_infer_dtypes(knl, {"a": np.float32, "b": np.float32})
+        knl = lp.prioritize_loops(knl, "i,j")
+        knl = lp.prioritize_loops(knl, "i,k")
+
+        # Get a linearization
+        knl = lp.get_one_linearized_kernel(lp.preprocess_kernel(knl))
+
+        # Get a pairwise schedule* -------------------------------------------
+
+        # *Note: Unless it is necessary to construct a PairwiseScheduleBuilder
+        # directly, this should be accomplished by calling the wrapper:
+        # from loopy.schedule.checker import (
+        #     get_schedule_for_statement_pair,
+        # )
+        # sched_builder_ab = get_schedule_for_statement_pair(
+        #     knl,
+        #     knl.linearization,
+        #     "insn_a",
+        #     "insn_b",
+        #     )
+
+        # Get list of concurent inames (for the schedule builder to ignore)
+        conc_loop_inames = set()
+
+        from loopy.schedule.checker.schedule import PairwiseScheduleBuilder
+        sched_builder_ab = PairwiseScheduleBuilder(
+            knl.linearization,
+            "insn_a",
+            "insn_b",
+            loops_to_ignore=conc_loop_inames,
+            )
+
+        # Get two isl maps from the PairwiseScheduleBuilder ------------------
+
+        from loopy.schedule.checker import (
+            get_isl_maps_from_PairwiseScheduleBuilder,
+        )
+        sched_a, sched_b = get_isl_maps_from_PairwiseScheduleBuilder(
+            sched_builder_ab, knl)
 
     """
 

From ed3eb7349eee3ba54c1d6cd7fccac0ad6e9a83f7 Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Sun, 31 May 2020 22:33:59 -0500
Subject: [PATCH 065/460] add example schedule creation output to docstring
 examples

---
 loopy/schedule/checker/__init__.py | 16 ++++++++++++++++
 loopy/schedule/checker/schedule.py | 16 ++++++++++++++++
 2 files changed, 32 insertions(+)

diff --git a/loopy/schedule/checker/__init__.py b/loopy/schedule/checker/__init__.py
index c454254f6..4489ca69b 100644
--- a/loopy/schedule/checker/__init__.py
+++ b/loopy/schedule/checker/__init__.py
@@ -91,6 +91,22 @@ def get_schedule_for_statement_pair(
         sched_a, sched_b = get_isl_maps_from_PairwiseScheduleBuilder(
             sched_builder_ab, knl)
 
+        print(sched_a)
+        print(sched_b)
+
+    Example Output::
+
+        [pi, pj, pk] -> {
+        [_lp_linchk_statement = 0, i, j, k] ->
+        [_lp_linchk_l0 = 0, _lp_linchk_l1 = i, _lp_linchk_l2 = 0,
+        _lp_linchk_l3 = j, _lp_linchk_l4 = 0] :
+        0 <= i < pi and 0 <= j < pj and 0 <= k < pk }
+        [pi, pj, pk] -> {
+        [_lp_linchk_statement = 1, i, j, k] ->
+        [_lp_linchk_l0 = 0, _lp_linchk_l1 = i, _lp_linchk_l2 = 1,
+        _lp_linchk_l3 = k, _lp_linchk_l4 = 0] :
+        0 <= i < pi and 0 <= j < pj and 0 <= k < pk }
+
     """
 
     # {{{ Preprocess if not already preprocessed
diff --git a/loopy/schedule/checker/schedule.py b/loopy/schedule/checker/schedule.py
index d7cc7b454..fead079ac 100644
--- a/loopy/schedule/checker/schedule.py
+++ b/loopy/schedule/checker/schedule.py
@@ -190,6 +190,22 @@ class PairwiseScheduleBuilder(object):
         sched_a, sched_b = get_isl_maps_from_PairwiseScheduleBuilder(
             sched_builder_ab, knl)
 
+        print(sched_a)
+        print(sched_b)
+
+    Example Output::
+
+        [pi, pj, pk] -> {
+        [_lp_linchk_statement = 0, i, j, k] ->
+        [_lp_linchk_l0 = 0, _lp_linchk_l1 = i, _lp_linchk_l2 = 0,
+        _lp_linchk_l3 = j, _lp_linchk_l4 = 0] :
+        0 <= i < pi and 0 <= j < pj and 0 <= k < pk }
+        [pi, pj, pk] -> {
+        [_lp_linchk_statement = 1, i, j, k] ->
+        [_lp_linchk_l0 = 0, _lp_linchk_l1 = i, _lp_linchk_l2 = 1,
+        _lp_linchk_l3 = k, _lp_linchk_l4 = 0] :
+        0 <= i < pi and 0 <= j < pj and 0 <= k < pk }
+
     """
 
     statement_var_name = "_lp_linchk_statement"

From ba46ade4f5b002e72451d593162cac22cfa10553 Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Mon, 1 Jun 2020 22:30:23 -0500
Subject: [PATCH 066/460] update identifier prefix for loopy.schedule.checker
 from _lp_sched_->_lp_linchk_

---
 test/test_linearization_checker.py | 57 ++++++++++++++++--------------
 1 file changed, 30 insertions(+), 27 deletions(-)

diff --git a/test/test_linearization_checker.py b/test/test_linearization_checker.py
index 6841072ff..01e28f24a 100644
--- a/test/test_linearization_checker.py
+++ b/test/test_linearization_checker.py
@@ -486,29 +486,32 @@ def check_sio_for_insn_pair(
         assert sio_aligned == expected_sio
 
     expected_lex_order_map = isl.Map("{ "
-        "[_lp_sched_l0', _lp_sched_l1', _lp_sched_l2', _lp_sched_l3', _lp_sched_l4']"
-        " -> [_lp_sched_l0, _lp_sched_l1, _lp_sched_l2, _lp_sched_l3, _lp_sched_l4]"
+        "[_lp_linchk_l0', _lp_linchk_l1', _lp_linchk_l2', _lp_linchk_l3', "
+        "_lp_linchk_l4']"
+        " -> "
+        "[_lp_linchk_l0, _lp_linchk_l1, _lp_linchk_l2, _lp_linchk_l3, "
+        "_lp_linchk_l4]"
         ":"
         "("
-        "_lp_sched_l0' < _lp_sched_l0 "
+        "_lp_linchk_l0' < _lp_linchk_l0 "
         ") or ("
-        "_lp_sched_l0'= _lp_sched_l0 and "
-        "_lp_sched_l1' < _lp_sched_l1 "
+        "_lp_linchk_l0'= _lp_linchk_l0 and "
+        "_lp_linchk_l1' < _lp_linchk_l1 "
         ") or ("
-        "_lp_sched_l0'= _lp_sched_l0 and "
-        "_lp_sched_l1'= _lp_sched_l1 and "
-        "_lp_sched_l2' < _lp_sched_l2 "
+        "_lp_linchk_l0'= _lp_linchk_l0 and "
+        "_lp_linchk_l1'= _lp_linchk_l1 and "
+        "_lp_linchk_l2' < _lp_linchk_l2 "
         ") or ("
-        "_lp_sched_l0'= _lp_sched_l0 and "
-        "_lp_sched_l1'= _lp_sched_l1 and "
-        "_lp_sched_l2'= _lp_sched_l2 and "
-        "_lp_sched_l3' < _lp_sched_l3 "
+        "_lp_linchk_l0'= _lp_linchk_l0 and "
+        "_lp_linchk_l1'= _lp_linchk_l1 and "
+        "_lp_linchk_l2'= _lp_linchk_l2 and "
+        "_lp_linchk_l3' < _lp_linchk_l3 "
         ") or ("
-        "_lp_sched_l0'= _lp_sched_l0 and "
-        "_lp_sched_l1'= _lp_sched_l1 and "
-        "_lp_sched_l2'= _lp_sched_l2 and "
-        "_lp_sched_l3'= _lp_sched_l3 and "
-        "_lp_sched_l4' < _lp_sched_l4"
+        "_lp_linchk_l0'= _lp_linchk_l0 and "
+        "_lp_linchk_l1'= _lp_linchk_l1 and "
+        "_lp_linchk_l2'= _lp_linchk_l2 and "
+        "_lp_linchk_l3'= _lp_linchk_l3 and "
+        "_lp_linchk_l4' < _lp_linchk_l4"
         ")"
         "}")
 
@@ -522,9 +525,9 @@ def check_sio_for_insn_pair(
 
     expected_sio = isl.Map(
         "[pi, pj, pk] -> { "
-        "[_lp_sched_statement'=0, i', k'] -> [_lp_sched_statement=1, i, j]:"
+        "[_lp_linchk_statement'=0, i', k'] -> [_lp_linchk_statement=1, i, j]:"
         "0 <= i' < pi and 0 <= k' < pk and 0 <= j < pj and 0 <= i < pi and i > i'; "
-        "[_lp_sched_statement'=0, i', k'] -> [_lp_sched_statement=1, i=i', j]:"
+        "[_lp_linchk_statement'=0, i', k'] -> [_lp_linchk_statement=1, i=i', j]:"
         "0 <= i' < pi and 0 <= k' < pk and 0 <= j < pj "
         "}"
         )
@@ -539,9 +542,9 @@ def check_sio_for_insn_pair(
 
     expected_sio = isl.Map(
         "[pi, pj, pk] -> { "
-        "[_lp_sched_statement'=0, i', k'] -> [_lp_sched_statement=1, i, j]:"
+        "[_lp_linchk_statement'=0, i', k'] -> [_lp_linchk_statement=1, i, j]:"
         "0 <= i' < pi and 0 <= k' < pk and 0 <= j < pj and 0 <= i < pi and i > i'; "
-        "[_lp_sched_statement'=0, i', k'] -> [_lp_sched_statement=1, i=i', j]:"
+        "[_lp_linchk_statement'=0, i', k'] -> [_lp_linchk_statement=1, i=i', j]:"
         "0 <= i' < pi and 0 <= k' < pk and 0 <= j < pj "
         "}"
         )
@@ -556,7 +559,7 @@ def check_sio_for_insn_pair(
 
     expected_sio = isl.Map(
         "[pt, pi, pk] -> { "
-        "[_lp_sched_statement'=0, i', k'] -> [_lp_sched_statement=1, t]:"
+        "[_lp_linchk_statement'=0, i', k'] -> [_lp_linchk_statement=1, t]:"
         "0 <= i' < pi and 0 <= k' < pk and 0 <= t < pt "
         "}"
         )
@@ -571,11 +574,11 @@ def check_sio_for_insn_pair(
 
     expected_sio = isl.Map(
         "[pi, pj] -> { "
-        "[_lp_sched_statement'=0, i', j'] -> [_lp_sched_statement=1, i, j]:"
+        "[_lp_linchk_statement'=0, i', j'] -> [_lp_linchk_statement=1, i, j]:"
         "0 <= i' < pi and 0 <= j' < pj and i > i' and 0 <= i < pi and 0 <= j < pj; "
-        "[_lp_sched_statement'=0, i', j'] -> [_lp_sched_statement=1, i=i', j]:"
+        "[_lp_linchk_statement'=0, i', j'] -> [_lp_linchk_statement=1, i=i', j]:"
         "0 <= i' < pi and 0 <= j' < pj and j > j' and 0 <= j < pj; "
-        "[_lp_sched_statement'=0, i', j'] -> [_lp_sched_statement=1, i=i', j=j']:"
+        "[_lp_linchk_statement'=0, i', j'] -> [_lp_linchk_statement=1, i=i', j=j']:"
         "0 <= i' < pi and 0 <= j' < pj "
         "}"
         )
@@ -590,7 +593,7 @@ def check_sio_for_insn_pair(
 
     expected_sio = isl.Map(
         "[pt, pi, pj] -> { "
-        "[_lp_sched_statement'=0, i', j'] -> [_lp_sched_statement=1, t]:"
+        "[_lp_linchk_statement'=0, i', j'] -> [_lp_linchk_statement=1, t]:"
         "0 <= i' < pi and 0 <= j' < pj and 0 <= t < pt "
         "}"
         )
@@ -605,7 +608,7 @@ def check_sio_for_insn_pair(
 
     expected_sio = isl.Map(
         "[pt, pi, pj] -> { "
-        "[_lp_sched_statement'=0, i', j'] -> [_lp_sched_statement=1, t]:"
+        "[_lp_linchk_statement'=0, i', j'] -> [_lp_linchk_statement=1, t]:"
         "0 <= i' < pi and 0 <= j' < pj and 0 <= t < pt "
         "}"
         )

From a4c97513effa690b7c3a66f67caf54ed565490ad Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Tue, 2 Jun 2020 03:30:13 -0500
Subject: [PATCH 067/460] don't require islvars be passed to
 get_lex_order_constraint(); islvars default: create islvars from
 before_names+after_names

---
 .../checker/lexicographic_order_map.py        | 27 ++++++++++---------
 1 file changed, 14 insertions(+), 13 deletions(-)

diff --git a/loopy/schedule/checker/lexicographic_order_map.py b/loopy/schedule/checker/lexicographic_order_map.py
index 17b6616ca..b547e1d94 100644
--- a/loopy/schedule/checker/lexicographic_order_map.py
+++ b/loopy/schedule/checker/lexicographic_order_map.py
@@ -63,17 +63,11 @@ def get_statement_ordering_map(
         sio, isl.dim_type.in_, before_marker)
 
 
-def get_lex_order_constraint(islvars, before_names, after_names):
+def get_lex_order_constraint(before_names, after_names, islvars=None):
     """Return a constraint represented as an :class:`islpy.Set`
         defining a 'happens before' relationship in a lexicographic
         ordering.
 
-    :arg islvars: A dictionary from variable names to :class:`islpy.PwAff`
-        instances that represent each of the variables
-        (islvars may be produced by `islpy.make_zero_and_vars`). The key
-        '0' is also include and represents a :class:`islpy.PwAff` zero constant.
-        This dictionary defines the space to be used for the set.
-
     :arg before_names: A list of :class:`str` variable names representing
         the lexicographic space dimensions for a point in lexicographic
         time that occurs before. (see example below)
@@ -82,6 +76,14 @@ def get_lex_order_constraint(islvars, before_names, after_names):
         the lexicographic space dimensions for a point in lexicographic
         time that occurs after. (see example below)
 
+    :arg islvars: A dictionary from variable names to :class:`islpy.PwAff`
+        instances that represent each of the variables
+        (islvars may be produced by `islpy.make_zero_and_vars`). The key
+        '0' is also include and represents a :class:`islpy.PwAff` zero constant.
+        This dictionary defines the space to be used for the set. If no
+        value is passed, the dictionary will be made using ``before_names``
+        and ``after_names``.
+
     :returns: An :class:`islpy.Set` representing a constraint that enforces a
         lexicographic ordering. E.g., if ``before_names = [i0', i1', i2']`` and
         ``after_names = [i0, i1, i2]``, return the set::
@@ -92,6 +94,10 @@ def get_lex_order_constraint(islvars, before_names, after_names):
 
     """
 
+    # If no islvars passed, make them using the names provided
+    if islvars is None:
+        islvars = isl.make_zero_and_vars(before_names+after_names, [])
+
     # Initialize constraint with i0' < i0
     lex_order_constraint = islvars[before_names[0]].lt_set(islvars[after_names[0]])
 
@@ -164,12 +170,7 @@ def create_lex_order_map(
     assert len(before_names) == len(after_names) == n_dims
     dim_type = isl.dim_type
 
-    islvars = isl.make_zero_and_vars(
-            before_names+after_names,
-            [])
-
-    lex_order_constraint = get_lex_order_constraint(
-        islvars, before_names, after_names)
+    lex_order_constraint = get_lex_order_constraint(before_names, after_names)
 
     lex_map = isl.Map.from_domain(lex_order_constraint)
     lex_map = lex_map.move_dims(

From ed8c8fa252fc895c3e7ce254111227d981d1b94c Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Tue, 2 Jun 2020 04:16:23 -0500
Subject: [PATCH 068/460] delete stray print statements in
 test_statement_instance_ordering_creation()

---
 test/test_linearization_checker.py | 6 ------
 1 file changed, 6 deletions(-)

diff --git a/test/test_linearization_checker.py b/test/test_linearization_checker.py
index 01e28f24a..58884b443 100644
--- a/test/test_linearization_checker.py
+++ b/test/test_linearization_checker.py
@@ -475,14 +475,8 @@ def check_sio_for_insn_pair(
             sched_lex_order_map,
             )
 
-        print(sio)
-        print(expected_sio)
-
         sio_aligned = align_isl_maps_by_var_names(sio, expected_sio)
 
-        print(sio_aligned)
-        print(expected_sio)
-
         assert sio_aligned == expected_sio
 
     expected_lex_order_map = isl.Map("{ "

From 8761fb2734364b27ef52ae28f3760bed18b05ea5 Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Tue, 2 Jun 2020 04:18:13 -0500
Subject: [PATCH 069/460] for consistency between runs, sort var names
 extracted from isl sets

---
 loopy/schedule/checker/utils.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/loopy/schedule/checker/utils.py b/loopy/schedule/checker/utils.py
index 0728e9686..eb7707f67 100644
--- a/loopy/schedule/checker/utils.py
+++ b/loopy/schedule/checker/utils.py
@@ -156,7 +156,9 @@ def list_var_names_in_isl_sets(
     inames = set()
     for isl_set in isl_sets:
         inames.update(isl_set.get_var_names(set_dim))
-    return list(inames)
+
+    # sorting is not necessary, but keeps results consistent between runs
+    return sorted(list(inames))
 
 
 def create_symbolic_isl_map_from_tuples(

From 055be9744ec3a0377d3c142b0e89cb3f3a237e20 Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Tue, 2 Jun 2020 12:40:21 -0500
Subject: [PATCH 070/460] fix docstring indentation

---
 loopy/schedule/checker/schedule.py | 64 +++++++++++++++---------------
 1 file changed, 32 insertions(+), 32 deletions(-)

diff --git a/loopy/schedule/checker/schedule.py b/loopy/schedule/checker/schedule.py
index fead079ac..459ab4fcb 100644
--- a/loopy/schedule/checker/schedule.py
+++ b/loopy/schedule/checker/schedule.py
@@ -28,17 +28,17 @@ class StatementRef(object):
 
     .. attribute:: insn_id
 
-       A :class:`str` specifying the :mod:`loopy` instruction id
-       for this statement.
+        A :class:`str` specifying the :mod:`loopy` instruction id
+        for this statement.
 
     .. attribute:: int_id
 
-       A :class:`int` uniquely identifying the statement within a
-       :class:`PairwiseScheduleBuilder`. A :class:`PairwiseScheduleBuilder`
-       builds a mapping from points in a space of statement instances to
-       points in a lexicographic ordering. The `statement` dimension of a
-       point in the statement instance space representing an instance of
-       this statement is assigned this value (`int_id`).
+        A :class:`int` uniquely identifying the statement within a
+        :class:`PairwiseScheduleBuilder`. A :class:`PairwiseScheduleBuilder`
+        builds a mapping from points in a space of statement instances to
+        points in a lexicographic ordering. The `statement` dimension of a
+        point in the statement instance space representing an instance of
+        this statement is assigned this value (`int_id`).
 
     """
 
@@ -81,13 +81,13 @@ class StatementInstanceSet(object):
 
     .. attribute:: stmt_ref
 
-       A :class:`StatementRef`.
+        A :class:`StatementRef`.
 
     .. attribute:: lex_points
 
-       A list containing one value for each dimension in a lexicographic
-       ordering. These values describe the ordering of the statements,
-       and may be :class:`str` :mod:`loopy` inames or :class:`int`.
+        A list containing one value for each dimension in a lexicographic
+        ordering. These values describe the ordering of the statements,
+        and may be :class:`str` :mod:`loopy` inames or :class:`int`.
 
     """
 
@@ -111,35 +111,35 @@ class PairwiseScheduleBuilder(object):
 
     .. attribute:: stmt_instance_before
 
-       A :class:`StatementInstanceSet` whose ordering relative
-       to `stmt_instance_after is described by PairwiseScheduleBuilder. This
-       is achieved by mapping the statement instances in both sets to points
-       in a single lexicographic ordering. Points in lexicographic ordering
-       are represented as a list of :class:`int` or as :class:`str`
-       :mod:`loopy` inames.
+        A :class:`StatementInstanceSet` whose ordering relative
+        to `stmt_instance_after is described by PairwiseScheduleBuilder. This
+        is achieved by mapping the statement instances in both sets to points
+        in a single lexicographic ordering. Points in lexicographic ordering
+        are represented as a list of :class:`int` or as :class:`str`
+        :mod:`loopy` inames.
 
     .. attribute:: stmt_instance_after
 
-       A :class:`StatementInstanceSet` whose ordering relative
-       to `stmt_instance_before is described by PairwiseScheduleBuilder. This
-       is achieved by mapping the statement instances in both sets to points
-       in a single lexicographic ordering. Points in lexicographic ordering
-       are represented as a list of :class:`int` or as :class:`str`
-       :mod:`loopy` inames.
+        A :class:`StatementInstanceSet` whose ordering relative
+        to `stmt_instance_before is described by PairwiseScheduleBuilder. This
+        is achieved by mapping the statement instances in both sets to points
+        in a single lexicographic ordering. Points in lexicographic ordering
+        are represented as a list of :class:`int` or as :class:`str`
+        :mod:`loopy` inames.
 
     .. attribute:: statement_var_name
 
-       A :class:`str` specifying the name of the isl variable used
-       to represent the unique :class:`int` statement id.
+        A :class:`str` specifying the name of the isl variable used
+        to represent the unique :class:`int` statement id.
 
     .. attribute:: lex_var_prefix
 
-       A :class:`str` specifying the prefix to be used for the variables
-       representing the dimensions in the lexicographic ordering. E.g.,
-       a prefix of "_lp_linchk_lex" might yield variables "_lp_linchk_lex0",
-       "_lp_linchk_lex1", "_lp_linchk_lex2". Note the identifier prefix
-       policies described in the documentation under
-       *Loopy's Model of a Kernel* -> *Identifiers*.
+        A :class:`str` specifying the prefix to be used for the variables
+        representing the dimensions in the lexicographic ordering. E.g.,
+        a prefix of "_lp_linchk_lex" might yield variables "_lp_linchk_lex0",
+        "_lp_linchk_lex1", "_lp_linchk_lex2". Note the identifier prefix
+        policies described in the documentation under
+        *Loopy's Model of a Kernel* -> *Identifiers*.
 
     Example usage::
 

From 6b0b01bd10aeffc7afbf9f0050b3e04e84710577 Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Tue, 2 Jun 2020 12:49:20 -0500
Subject: [PATCH 071/460] remove redundant usage example and note that a
 PairwiseScheduleBuilder should be created using
 get_schedule_for_statement_pair

---
 loopy/schedule/checker/__init__.py |  2 +-
 loopy/schedule/checker/schedule.py | 68 +-----------------------------
 2 files changed, 3 insertions(+), 67 deletions(-)

diff --git a/loopy/schedule/checker/__init__.py b/loopy/schedule/checker/__init__.py
index 4489ca69b..8c4b06b0d 100644
--- a/loopy/schedule/checker/__init__.py
+++ b/loopy/schedule/checker/__init__.py
@@ -71,7 +71,7 @@ def get_schedule_for_statement_pair(
         # Get a linearization
         knl = lp.get_one_linearized_kernel(lp.preprocess_kernel(knl))
 
-        # Get a pairwise schedule* -------------------------------------------
+        # Get a pairwise schedule --------------------------------------------
 
         from loopy.schedule.checker import (
             get_schedule_for_statement_pair,
diff --git a/loopy/schedule/checker/schedule.py b/loopy/schedule/checker/schedule.py
index 459ab4fcb..12547d01c 100644
--- a/loopy/schedule/checker/schedule.py
+++ b/loopy/schedule/checker/schedule.py
@@ -107,7 +107,8 @@ class PairwiseScheduleBuilder(object):
     """Given a pair of statements in a linearized kernel, PairwiseScheduleBuilder
     determines the (relative) order in which the instances are executed,
     by creating a mapping from statement instances to points in a single
-    lexicographic ordering.
+    lexicographic ordering. To create a PairwiseScheduleBuilder, use
+    :func:`loopy.schedule.checker.get_schedule_for_statement_pair`.
 
     .. attribute:: stmt_instance_before
 
@@ -141,71 +142,6 @@ class PairwiseScheduleBuilder(object):
         policies described in the documentation under
         *Loopy's Model of a Kernel* -> *Identifiers*.
 
-    Example usage::
-
-        # Make kernel --------------------------------------------------------
-        knl = lp.make_kernel(
-            "{[i,j,k]: 0<=i<pi and 0<=j<pj and 0<=k<pk}",
-            [
-                "a[i,j] = j  {id=insn_a}",
-                "b[i,k] = k+a[i,0]  {id=insn_b,dep=insn_a}",
-            ])
-        knl = lp.add_and_infer_dtypes(knl, {"a": np.float32, "b": np.float32})
-        knl = lp.prioritize_loops(knl, "i,j")
-        knl = lp.prioritize_loops(knl, "i,k")
-
-        # Get a linearization
-        knl = lp.get_one_linearized_kernel(lp.preprocess_kernel(knl))
-
-        # Get a pairwise schedule* -------------------------------------------
-
-        # *Note: Unless it is necessary to construct a PairwiseScheduleBuilder
-        # directly, this should be accomplished by calling the wrapper:
-        # from loopy.schedule.checker import (
-        #     get_schedule_for_statement_pair,
-        # )
-        # sched_builder_ab = get_schedule_for_statement_pair(
-        #     knl,
-        #     knl.linearization,
-        #     "insn_a",
-        #     "insn_b",
-        #     )
-
-        # Get list of concurent inames (for the schedule builder to ignore)
-        conc_loop_inames = set()
-
-        from loopy.schedule.checker.schedule import PairwiseScheduleBuilder
-        sched_builder_ab = PairwiseScheduleBuilder(
-            knl.linearization,
-            "insn_a",
-            "insn_b",
-            loops_to_ignore=conc_loop_inames,
-            )
-
-        # Get two isl maps from the PairwiseScheduleBuilder ------------------
-
-        from loopy.schedule.checker import (
-            get_isl_maps_from_PairwiseScheduleBuilder,
-        )
-        sched_a, sched_b = get_isl_maps_from_PairwiseScheduleBuilder(
-            sched_builder_ab, knl)
-
-        print(sched_a)
-        print(sched_b)
-
-    Example Output::
-
-        [pi, pj, pk] -> {
-        [_lp_linchk_statement = 0, i, j, k] ->
-        [_lp_linchk_l0 = 0, _lp_linchk_l1 = i, _lp_linchk_l2 = 0,
-        _lp_linchk_l3 = j, _lp_linchk_l4 = 0] :
-        0 <= i < pi and 0 <= j < pj and 0 <= k < pk }
-        [pi, pj, pk] -> {
-        [_lp_linchk_statement = 1, i, j, k] ->
-        [_lp_linchk_l0 = 0, _lp_linchk_l1 = i, _lp_linchk_l2 = 1,
-        _lp_linchk_l3 = k, _lp_linchk_l4 = 0] :
-        0 <= i < pi and 0 <= j < pj and 0 <= k < pk }
-
     """
 
     statement_var_name = "_lp_linchk_statement"

From bdef0a639f8a4dd9a6ceb9d58c74650437861831 Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Tue, 2 Jun 2020 12:55:56 -0500
Subject: [PATCH 072/460] rephrase doc comment about using
 get_schedule_for_statement_pair to create a PairwiseScheduleBuilder

---
 loopy/schedule/checker/schedule.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/loopy/schedule/checker/schedule.py b/loopy/schedule/checker/schedule.py
index 12547d01c..756b85628 100644
--- a/loopy/schedule/checker/schedule.py
+++ b/loopy/schedule/checker/schedule.py
@@ -107,8 +107,9 @@ class PairwiseScheduleBuilder(object):
     """Given a pair of statements in a linearized kernel, PairwiseScheduleBuilder
     determines the (relative) order in which the instances are executed,
     by creating a mapping from statement instances to points in a single
-    lexicographic ordering. To create a PairwiseScheduleBuilder, use
-    :func:`loopy.schedule.checker.get_schedule_for_statement_pair`.
+    lexicographic ordering. The function
+    :func:`loopy.schedule.checker.get_schedule_for_statement_pair` is the
+    preferred method of creating a PairwiseScheduleBuilder.
 
     .. attribute:: stmt_instance_before
 

From 66283621373e8706b17795cb40e949dc2457c6a7 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Andreas=20Kl=C3=B6ckner?= <inform@tiker.net>
Date: Wed, 3 Jun 2020 01:03:55 +0200
Subject: [PATCH 073/460] Some styling fixes for pairwise schedule
 representation

---
 doc/ref_kernel.rst                 |  2 ++
 loopy/schedule/checker/__init__.py | 28 +++++++++++++++++-----------
 loopy/schedule/checker/schedule.py | 10 +++-------
 3 files changed, 22 insertions(+), 18 deletions(-)

diff --git a/doc/ref_kernel.rst b/doc/ref_kernel.rst
index af35221ad..1c4d8971d 100644
--- a/doc/ref_kernel.rst
+++ b/doc/ref_kernel.rst
@@ -154,6 +154,8 @@ Tag                             Meaning
 Identifiers
 -----------
 
+.. _reserved-identifiers:
+
 Reserved Identifiers
 ^^^^^^^^^^^^^^^^^^^^
 
diff --git a/loopy/schedule/checker/__init__.py b/loopy/schedule/checker/__init__.py
index 8c4b06b0d..64d3d7c66 100644
--- a/loopy/schedule/checker/__init__.py
+++ b/loopy/schedule/checker/__init__.py
@@ -21,7 +21,7 @@
 """
 
 
-# {{{ Create PairwiseScheduleBuilder for statement pair
+# {{{ create PairwiseScheduleBuilder for statement pair
 
 def get_schedule_for_statement_pair(
         knl,
@@ -30,9 +30,9 @@ def get_schedule_for_statement_pair(
         insn_id_after,
         ):
     """Create a :class:`loopy.schedule.checker.schedule.PairwiseScheduleBuilder`
-        representing the order of two statements as a mapping from
-        :class:`loopy.schedule.checker.StatementInstanceSet`
-        to lexicographic time.
+    representing the order of two statements as a mapping from
+    :class:`loopy.schedule.checker.StatementInstanceSet`
+    to lexicographic time.
 
     :arg knl: A :class:`loopy.kernel.LoopKernel` containing the
         linearization items that will be used to create a schedule.
@@ -109,12 +109,15 @@ def get_schedule_for_statement_pair(
 
     """
 
-    # {{{ Preprocess if not already preprocessed
+    # {{{ preprocess if not already preprocessed
+
     from loopy import preprocess_kernel
     preproc_knl = preprocess_kernel(knl)
+
     # }}}
 
-    # {{{ Find any EnterLoop inames that are tagged as concurrent
+    # {{{ find any EnterLoop inames that are tagged as concurrent
+    
     # so that PairwiseScheduleBuilder knows to ignore them
     # (In the future, this shouldn't be necessary because there
     #  won't be any inames with ConcurrentTags in EnterLoop linearization items.
@@ -132,9 +135,11 @@ def get_schedule_for_statement_pair(
             "get_schedule_for_statement_pair encountered EnterLoop for inames %s "
             "with ConcurrentTag(s) in linearization for kernel %s. "
             "Ignoring these loops." % (conc_loop_inames, preproc_knl.name))
+
     # }}}
 
     # {{{ Create PairwiseScheduleBuilder: mapping of {statement instance: lex point}
+
     # include only instructions involved in this dependency
     from loopy.schedule.checker.schedule import PairwiseScheduleBuilder
     return PairwiseScheduleBuilder(
@@ -143,17 +148,18 @@ def get_schedule_for_statement_pair(
         insn_id_after,
         loops_to_ignore=conc_loop_inames,
         )
+
     # }}}
 
 # }}}
 
 
-# {{{ Get isl map pair from PairwiseScheduleBuilder
+# {{{ get_isl_maps_from_PairwiseScheduleBuilder
 
 def get_isl_maps_from_PairwiseScheduleBuilder(sched_builder, knl):
-    """Create a pair of :class:`islpy.Map`s representing a
-        sub-schedule as two mappings from statement instances to lexicographic
-        time, one for the dependee statement and one for the depender.
+    """Create a pair of :class:`islpy.Map`\ s representing a
+    sub-schedule as two mappings from statement instances to lexicographic
+    time, one for the dependee statement and one for the depender.
 
     :arg sched_builder: A
         :class:`loopy.schedule.checker.schedule.PairwiseScheduleBuilder`
@@ -164,7 +170,7 @@ def get_isl_maps_from_PairwiseScheduleBuilder(sched_builder, knl):
     :arg knl: A :class:`loopy.kernel.LoopKernel` containing the
         linearization items that will be used to create a schedule.
 
-    :returns: A two-tuple containing two :class:`islpy.Map`s
+    :returns: A two-tuple containing two :class:`islpy.Map`\ s
         representing the schedule as two mappings
         from statement instances to lexicographic time, one for
         the dependee and one for the depender.
diff --git a/loopy/schedule/checker/schedule.py b/loopy/schedule/checker/schedule.py
index 756b85628..6dc091f9e 100644
--- a/loopy/schedule/checker/schedule.py
+++ b/loopy/schedule/checker/schedule.py
@@ -38,7 +38,7 @@ class StatementRef(object):
         builds a mapping from points in a space of statement instances to
         points in a lexicographic ordering. The `statement` dimension of a
         point in the statement instance space representing an instance of
-        this statement is assigned this value (`int_id`).
+        this statement is assigned this value.
 
     """
 
@@ -88,7 +88,6 @@ class StatementInstanceSet(object):
         A list containing one value for each dimension in a lexicographic
         ordering. These values describe the ordering of the statements,
         and may be :class:`str` :mod:`loopy` inames or :class:`int`.
-
     """
 
     def __init__(
@@ -131,7 +130,7 @@ class PairwiseScheduleBuilder(object):
 
     .. attribute:: statement_var_name
 
-        A :class:`str` specifying the name of the isl variable used
+        A :class:`str` specifying the name of the variable used
         to represent the unique :class:`int` statement id.
 
     .. attribute:: lex_var_prefix
@@ -139,10 +138,7 @@ class PairwiseScheduleBuilder(object):
         A :class:`str` specifying the prefix to be used for the variables
         representing the dimensions in the lexicographic ordering. E.g.,
         a prefix of "_lp_linchk_lex" might yield variables "_lp_linchk_lex0",
-        "_lp_linchk_lex1", "_lp_linchk_lex2". Note the identifier prefix
-        policies described in the documentation under
-        *Loopy's Model of a Kernel* -> *Identifiers*.
-
+        "_lp_linchk_lex1", "_lp_linchk_lex2". Cf. :ref:`reserved-identifiers`.
     """
 
     statement_var_name = "_lp_linchk_statement"

From 0bb444450e327a11aae08ce9e26c441e0d706057 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Andreas=20Kl=C3=B6ckner?= <inform@tiker.net>
Date: Wed, 3 Jun 2020 01:32:19 +0200
Subject: [PATCH 074/460] Placate flake8

---
 loopy/schedule/checker/__init__.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/loopy/schedule/checker/__init__.py b/loopy/schedule/checker/__init__.py
index 64d3d7c66..790088d5c 100644
--- a/loopy/schedule/checker/__init__.py
+++ b/loopy/schedule/checker/__init__.py
@@ -117,7 +117,7 @@ def get_schedule_for_statement_pair(
     # }}}
 
     # {{{ find any EnterLoop inames that are tagged as concurrent
-    
+
     # so that PairwiseScheduleBuilder knows to ignore them
     # (In the future, this shouldn't be necessary because there
     #  won't be any inames with ConcurrentTags in EnterLoop linearization items.
@@ -157,7 +157,7 @@ def get_schedule_for_statement_pair(
 # {{{ get_isl_maps_from_PairwiseScheduleBuilder
 
 def get_isl_maps_from_PairwiseScheduleBuilder(sched_builder, knl):
-    """Create a pair of :class:`islpy.Map`\ s representing a
+    r"""Create a pair of :class:`islpy.Map`\ s representing a
     sub-schedule as two mappings from statement instances to lexicographic
     time, one for the dependee statement and one for the depender.
 

From 5dafc30ab3e7f189f80980d21064b6bc6dff2c5a Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Fri, 5 Jun 2020 06:27:08 -0500
Subject: [PATCH 075/460] remove get_isl_maps_from_PairwiseScheduleBuilder();
 instead get j inames domains inside PairwiseScheduleBuilder.create_isl_maps()
 and just call PairwiseScheduleBuilder.create_isl_maps directly

---
 loopy/schedule/checker/__init__.py | 50 +++---------------------------
 loopy/schedule/checker/schedule.py | 29 +++++++++--------
 test/test_linearization_checker.py | 19 ++++--------
 3 files changed, 24 insertions(+), 74 deletions(-)

diff --git a/loopy/schedule/checker/__init__.py b/loopy/schedule/checker/__init__.py
index 8c4b06b0d..b287ca4b7 100644
--- a/loopy/schedule/checker/__init__.py
+++ b/loopy/schedule/checker/__init__.py
@@ -57,7 +57,7 @@ def get_schedule_for_statement_pair(
 
     Example usage::
 
-        # Make kernel --------------------------------------------------------
+        # Make kernel ------------------------------------------------------------
         knl = lp.make_kernel(
             "{[i,j,k]: 0<=i<pi and 0<=j<pj and 0<=k<pk}",
             [
@@ -71,7 +71,7 @@ def get_schedule_for_statement_pair(
         # Get a linearization
         knl = lp.get_one_linearized_kernel(lp.preprocess_kernel(knl))
 
-        # Get a pairwise schedule --------------------------------------------
+        # Get a pairwise schedule ------------------------------------------------
 
         from loopy.schedule.checker import (
             get_schedule_for_statement_pair,
@@ -83,13 +83,9 @@ def get_schedule_for_statement_pair(
             "insn_b",
             )
 
-        # Get two isl maps from the PairwiseScheduleBuilder ------------------
+        # Get two isl maps from the PairwiseScheduleBuilder ----------------------
 
-        from loopy.schedule.checker import (
-            get_isl_maps_from_PairwiseScheduleBuilder,
-        )
-        sched_a, sched_b = get_isl_maps_from_PairwiseScheduleBuilder(
-            sched_builder_ab, knl)
+        sched_a, sched_b = sched_builder_ab.create_isl_maps(knl)
 
         print(sched_a)
         print(sched_b)
@@ -146,41 +142,3 @@ def get_schedule_for_statement_pair(
     # }}}
 
 # }}}
-
-
-# {{{ Get isl map pair from PairwiseScheduleBuilder
-
-def get_isl_maps_from_PairwiseScheduleBuilder(sched_builder, knl):
-    """Create a pair of :class:`islpy.Map`s representing a
-        sub-schedule as two mappings from statement instances to lexicographic
-        time, one for the dependee statement and one for the depender.
-
-    :arg sched_builder: A
-        :class:`loopy.schedule.checker.schedule.PairwiseScheduleBuilder`
-        representing the order of two statements as a mapping from
-        :class:`loopy.schedule.checker.StatementInstanceSet`
-        to lexicographic time.
-
-    :arg knl: A :class:`loopy.kernel.LoopKernel` containing the
-        linearization items that will be used to create a schedule.
-
-    :returns: A two-tuple containing two :class:`islpy.Map`s
-        representing the schedule as two mappings
-        from statement instances to lexicographic time, one for
-        the dependee and one for the depender.
-    """
-
-    # {{{ Get iname domains
-    dom_before = knl.get_inames_domain(
-        knl.id_to_insn[
-            sched_builder.stmt_instance_before.stmt_ref.insn_id].within_inames)
-    dom_after = knl.get_inames_domain(
-        knl.id_to_insn[
-            sched_builder.stmt_instance_after.stmt_ref.insn_id].within_inames)
-    # }}}
-
-    # {{{ Get isl maps
-    return sched_builder.create_isl_maps(dom_before, dom_after)
-    # }}}
-
-# }}}
diff --git a/loopy/schedule/checker/schedule.py b/loopy/schedule/checker/schedule.py
index 756b85628..d10969f6d 100644
--- a/loopy/schedule/checker/schedule.py
+++ b/loopy/schedule/checker/schedule.py
@@ -308,20 +308,18 @@ def _pad_lex_tuple_with_zeros(stmt_inst, length):
 
     def create_isl_maps(
             self,
-            dom_before,
-            dom_after,
+            knl,
             dom_inames_ordered_before=None,
             dom_inames_ordered_after=None,
             ):
-        """Create two isl maps representing lex schedule as two mappings
-            from statement instances to lexicographic time, one for
-            ``stmt_instance_before`` and one for ``stmt_instance_after``.
-
-        :arg dom_before: A :class:`islpy.BasicSet` representing the
-            domain for ``stmt_instance_before``.
+        """Create a pair of :class:`islpy.Map`s representing a pairwise schedule
+            as two mappings from statement instances to lexicographic time,
+            one for ``stmt_instance_before`` and one for ``stmt_instance_after``.
 
-        :arg dom_after: A :class:`islpy.BasicSet` representing the
-            domain for ``stmt_instance_after``.
+        :arg knl: A :class:`loopy.kernel.LoopKernel` containing the
+            linearization items that are described by the schedule. This
+            kernel will be used to get the domains associated with the inames
+            used in the statements.
 
         :arg dom_inames_ordered_before: A list of :class:`str`
             representing the union of inames used in
@@ -336,7 +334,7 @@ def create_isl_maps(
             the space of the ISL map domain.
 
         :returns: A two-tuple containing two :class:`islpy.Map`s
-            representing the schedule as two mappings
+            representing the a pairwise schedule as two mappings
             from statement instances to lexicographic time, one for
             each of the two :class:`StatementInstanceSet`s.
 
@@ -352,8 +350,11 @@ def create_isl_maps(
         params_sched = []
         out_names_sched = self.get_lex_var_names()
 
-        def _get_isl_map_for_stmt_inst(
-                stmt_inst, dom, dom_inames_ordered):
+        def _get_isl_map_for_stmt_inst(stmt_inst, dom_inames_ordered):
+
+            # Get inames domain for statement instance (a BasicSet)
+            dom = knl.get_inames_domain(
+                knl.id_to_insn[stmt_inst.stmt_ref.insn_id].within_inames)
 
             # create an isl space
             # {('statement', <inames> used in statement domain>) ->
@@ -389,11 +390,9 @@ def _get_isl_map_for_stmt_inst(
 
         map_before = _get_isl_map_for_stmt_inst(
             self.stmt_instance_before,
-            dom_before,
             dom_inames_ordered_before)
         map_after = _get_isl_map_for_stmt_inst(
             self.stmt_instance_after,
-            dom_after,
             dom_inames_ordered_after)
 
         return (map_before, map_after)
diff --git a/test/test_linearization_checker.py b/test/test_linearization_checker.py
index 396fccf4f..02ac08592 100644
--- a/test/test_linearization_checker.py
+++ b/test/test_linearization_checker.py
@@ -50,7 +50,6 @@ def test_lexschedule_and_islmap_creation():
     import islpy as isl
     from loopy.schedule.checker import (
         get_schedule_for_statement_pair,
-        get_isl_maps_from_PairwiseScheduleBuilder,
     )
     from loopy.schedule.checker.utils import (
         align_isl_maps_by_var_names,
@@ -138,8 +137,7 @@ def test_lexschedule_and_islmap_creation():
 
     # Get two isl maps from the PairwiseScheduleBuilder
 
-    isl_sched_before, isl_sched_after = get_isl_maps_from_PairwiseScheduleBuilder(
-        sched_ab, knl)
+    isl_sched_before, isl_sched_after = sched_ab.create_isl_maps(knl)
 
     # Create expected maps, align, compare
 
@@ -174,8 +172,7 @@ def test_lexschedule_and_islmap_creation():
 
     # Get two isl maps from the PairwiseScheduleBuilder
 
-    isl_sched_before, isl_sched_after = get_isl_maps_from_PairwiseScheduleBuilder(
-        sched_ac, knl)
+    isl_sched_before, isl_sched_after = sched_ac.create_isl_maps(knl)
 
     # Create expected maps, align, compare
 
@@ -213,8 +210,7 @@ def perform_insn_ad_checks_with(sid_a, sid_d):
 
         # Get two isl maps from the PairwiseScheduleBuilder
 
-        isl_sched_before, isl_sched_after = \
-            get_isl_maps_from_PairwiseScheduleBuilder(sched_ad, knl)
+        isl_sched_before, isl_sched_after = sched_ad.create_isl_maps(knl)
 
         # Create expected maps, align, compare
 
@@ -259,8 +255,7 @@ def perform_insn_bc_checks_with(sid_b, sid_c):
 
         # Get two isl maps from the PairwiseScheduleBuilder
 
-        isl_sched_before, isl_sched_after = \
-            get_isl_maps_from_PairwiseScheduleBuilder(sched_bc, knl)
+        isl_sched_before, isl_sched_after = sched_bc.create_isl_maps(knl)
 
         # Create expected maps, align, compare
 
@@ -305,8 +300,7 @@ def perform_insn_bd_checks_with(sid_b, sid_d):
 
         # Get two isl maps from the PairwiseScheduleBuilder
 
-        isl_sched_before, isl_sched_after = \
-            get_isl_maps_from_PairwiseScheduleBuilder(sched_bd, knl)
+        isl_sched_before, isl_sched_after = sched_bd.create_isl_maps(knl)
 
         # Create expected maps, align, compare
 
@@ -351,8 +345,7 @@ def perform_insn_cd_checks_with(sid_c, sid_d):
 
         # Get two isl maps from the PairwiseScheduleBuilder
 
-        isl_sched_before, isl_sched_after = \
-            get_isl_maps_from_PairwiseScheduleBuilder(sched_cd, knl)
+        isl_sched_before, isl_sched_after = sched_cd.create_isl_maps(knl)
 
         # Create expected maps, align, compare
 

From bb2ebcf84438be2678be35a3fb3cea2e543783ad Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Fri, 5 Jun 2020 09:09:09 -0500
Subject: [PATCH 076/460] print name of class in
 PairwiseScheduleBuilder.__str__()

---
 loopy/schedule/checker/schedule.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/loopy/schedule/checker/schedule.py b/loopy/schedule/checker/schedule.py
index 37e5a5f4c..882bcc73f 100644
--- a/loopy/schedule/checker/schedule.py
+++ b/loopy/schedule/checker/schedule.py
@@ -405,6 +405,7 @@ def stringify_sched_stmt_instance(stmt_inst):
                 stmt_inst.stmt_ref.int_id,
                 stmt_inst.lex_points)
 
-        return "Before: %s\nAfter: %s" % (
+        return "%s(\nBefore: %s\nAfter: %s\n)" % (
+            self.__class__.__name__,
             stringify_sched_stmt_instance(self.stmt_instance_before),
             stringify_sched_stmt_instance(self.stmt_instance_after))

From a87f101a6b6339319eb9129a040304792d821389 Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Fri, 5 Jun 2020 09:11:15 -0500
Subject: [PATCH 077/460] rename pose->idx

---
 loopy/schedule/checker/utils.py | 24 ++++++++++++------------
 1 file changed, 12 insertions(+), 12 deletions(-)

diff --git a/loopy/schedule/checker/utils.py b/loopy/schedule/checker/utils.py
index eb7707f67..af9e4aaef 100644
--- a/loopy/schedule/checker/utils.py
+++ b/loopy/schedule/checker/utils.py
@@ -35,12 +35,12 @@ def get_islvars_from_space(space):
     return isl.make_zero_and_vars(in_names+out_names, param_names)
 
 
-def add_dims_to_isl_set(isl_set, dim_type, names, new_pose_start):
+def add_dims_to_isl_set(isl_set, dim_type, names, new_idx_start):
     new_set = isl_set.insert_dims(
-        dim_type, new_pose_start, len(names)
-        ).set_dim_name(dim_type, new_pose_start, names[0])
+        dim_type, new_idx_start, len(names)
+        ).set_dim_name(dim_type, new_idx_start, names[0])
     for i, name in enumerate(names[1:]):
-        new_set = new_set.set_dim_name(dim_type, new_pose_start+1+i, name)
+        new_set = new_set.set_dim_name(dim_type, new_idx_start+1+i, name)
     return new_set
 
 
@@ -90,27 +90,27 @@ def reorder_dims_by_name(
     other_dim_len = len(isl_set.get_var_names(other_dim_type))
 
     new_set = isl_set.copy()
-    for desired_pose, name in enumerate(desired_dims_ordered):
+    for desired_idx, name in enumerate(desired_dims_ordered):
         # if iname doesn't exist in set, add dim:
         if name not in new_set.get_var_names(dim_type):
             if add_missing:
                 # insert missing dim in correct location
                 new_set = new_set.insert_dims(
-                    dim_type, desired_pose, 1
+                    dim_type, desired_idx, 1
                     ).set_dim_name(
-                    dim_type, desired_pose, name)
+                    dim_type, desired_idx, name)
         else:  # iname exists in set
-            current_pose = new_set.find_dim_by_name(dim_type, name)
-            if current_pose != desired_pose:
-                # move_dims(dst_type, dst_pose, src_type, src_pose, n)
+            current_idx = new_set.find_dim_by_name(dim_type, name)
+            if current_idx != desired_idx:
+                # move_dims(dst_type, dst_idx, src_type, src_idx, n)
 
                 # first move to other dim because isl is stupid
                 new_set = new_set.move_dims(
-                    other_dim_type, other_dim_len, dim_type, current_pose, 1)
+                    other_dim_type, other_dim_len, dim_type, current_idx, 1)
 
                 # now move it where we actually want it
                 new_set = new_set.move_dims(
-                    dim_type, desired_pose, other_dim_type, other_dim_len, 1)
+                    dim_type, desired_idx, other_dim_type, other_dim_len, 1)
 
     return new_set
 

From 57669bc184019b3d8ee86f0600f2242aeb657504 Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Fri, 5 Jun 2020 09:55:41 -0500
Subject: [PATCH 078/460] change __str__ to __repr__ in StatementInstanceSet()
 and include class name in string

---
 loopy/schedule/checker/schedule.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/loopy/schedule/checker/schedule.py b/loopy/schedule/checker/schedule.py
index 882bcc73f..dbb2a7455 100644
--- a/loopy/schedule/checker/schedule.py
+++ b/loopy/schedule/checker/schedule.py
@@ -98,8 +98,9 @@ def __init__(
         self.stmt_ref = stmt_ref
         self.lex_points = lex_points
 
-    def __str__(self):
-        return "{%s, %s}" % (self.stmt_ref, self.lex_points)
+    def __repr__(self):
+        return "%s(%s, %s)" % (
+            self.__class__.__name__, self.stmt_ref, self.lex_points)
 
 
 class PairwiseScheduleBuilder(object):

From e3a4db902e49ee92b228219ac9cdce62a324137c Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Sun, 7 Jun 2020 15:24:42 -0500
Subject: [PATCH 079/460] rename
 _get_isl_map_for_stmt_inst()->_get_map_for_stmt_inst()

---
 loopy/schedule/checker/schedule.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/loopy/schedule/checker/schedule.py b/loopy/schedule/checker/schedule.py
index dbb2a7455..dc55a3bcd 100644
--- a/loopy/schedule/checker/schedule.py
+++ b/loopy/schedule/checker/schedule.py
@@ -347,7 +347,7 @@ def create_isl_maps(
         params_sched = []
         out_names_sched = self.get_lex_var_names()
 
-        def _get_isl_map_for_stmt_inst(stmt_inst, dom_inames_ordered):
+        def _get_map_for_stmt_inst(stmt_inst, dom_inames_ordered):
 
             # Get inames domain for statement instance (a BasicSet)
             dom = knl.get_inames_domain(
@@ -385,10 +385,10 @@ def _get_isl_map_for_stmt_inst(stmt_inst, dom_inames_ordered):
                 space=sched_space,
                 )
 
-        map_before = _get_isl_map_for_stmt_inst(
+        map_before = _get_map_for_stmt_inst(
             self.stmt_instance_before,
             dom_inames_ordered_before)
-        map_after = _get_isl_map_for_stmt_inst(
+        map_after = _get_map_for_stmt_inst(
             self.stmt_instance_after,
             dom_inames_ordered_after)
 

From 0e317270aeebd9e154c4e307d27cdb7b4375a07d Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Sun, 7 Jun 2020 15:30:34 -0500
Subject: [PATCH 080/460] rename create_isl_maps()->build_maps()

---
 loopy/schedule/checker/__init__.py |  2 +-
 loopy/schedule/checker/schedule.py |  2 +-
 test/test_linearization_checker.py | 12 ++++++------
 3 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/loopy/schedule/checker/__init__.py b/loopy/schedule/checker/__init__.py
index 9698b95fc..6fd280a45 100644
--- a/loopy/schedule/checker/__init__.py
+++ b/loopy/schedule/checker/__init__.py
@@ -85,7 +85,7 @@ def get_schedule_for_statement_pair(
 
         # Get two isl maps from the PairwiseScheduleBuilder ----------------------
 
-        sched_a, sched_b = sched_builder_ab.create_isl_maps(knl)
+        sched_a, sched_b = sched_builder_ab.build_maps(knl)
 
         print(sched_a)
         print(sched_b)
diff --git a/loopy/schedule/checker/schedule.py b/loopy/schedule/checker/schedule.py
index dc55a3bcd..c9cc5981e 100644
--- a/loopy/schedule/checker/schedule.py
+++ b/loopy/schedule/checker/schedule.py
@@ -303,7 +303,7 @@ def _pad_lex_tuple_with_zeros(stmt_inst, length):
         self.stmt_instance_after = _pad_lex_tuple_with_zeros(
             self.stmt_instance_after, max_lex_dim)
 
-    def create_isl_maps(
+    def build_maps(
             self,
             knl,
             dom_inames_ordered_before=None,
diff --git a/test/test_linearization_checker.py b/test/test_linearization_checker.py
index 02ac08592..16c56274d 100644
--- a/test/test_linearization_checker.py
+++ b/test/test_linearization_checker.py
@@ -137,7 +137,7 @@ def test_lexschedule_and_islmap_creation():
 
     # Get two isl maps from the PairwiseScheduleBuilder
 
-    isl_sched_before, isl_sched_after = sched_ab.create_isl_maps(knl)
+    isl_sched_before, isl_sched_after = sched_ab.build_maps(knl)
 
     # Create expected maps, align, compare
 
@@ -172,7 +172,7 @@ def test_lexschedule_and_islmap_creation():
 
     # Get two isl maps from the PairwiseScheduleBuilder
 
-    isl_sched_before, isl_sched_after = sched_ac.create_isl_maps(knl)
+    isl_sched_before, isl_sched_after = sched_ac.build_maps(knl)
 
     # Create expected maps, align, compare
 
@@ -210,7 +210,7 @@ def perform_insn_ad_checks_with(sid_a, sid_d):
 
         # Get two isl maps from the PairwiseScheduleBuilder
 
-        isl_sched_before, isl_sched_after = sched_ad.create_isl_maps(knl)
+        isl_sched_before, isl_sched_after = sched_ad.build_maps(knl)
 
         # Create expected maps, align, compare
 
@@ -255,7 +255,7 @@ def perform_insn_bc_checks_with(sid_b, sid_c):
 
         # Get two isl maps from the PairwiseScheduleBuilder
 
-        isl_sched_before, isl_sched_after = sched_bc.create_isl_maps(knl)
+        isl_sched_before, isl_sched_after = sched_bc.build_maps(knl)
 
         # Create expected maps, align, compare
 
@@ -300,7 +300,7 @@ def perform_insn_bd_checks_with(sid_b, sid_d):
 
         # Get two isl maps from the PairwiseScheduleBuilder
 
-        isl_sched_before, isl_sched_after = sched_bd.create_isl_maps(knl)
+        isl_sched_before, isl_sched_after = sched_bd.build_maps(knl)
 
         # Create expected maps, align, compare
 
@@ -345,7 +345,7 @@ def perform_insn_cd_checks_with(sid_c, sid_d):
 
         # Get two isl maps from the PairwiseScheduleBuilder
 
-        isl_sched_before, isl_sched_after = sched_cd.create_isl_maps(knl)
+        isl_sched_before, isl_sched_after = sched_cd.build_maps(knl)
 
         # Create expected maps, align, compare
 

From 3e93b622192980ecd6c1a2b46391a25497e60127 Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Sun, 7 Jun 2020 15:45:21 -0500
Subject: [PATCH 081/460] rename isl_map->map_obj

---
 loopy/schedule/checker/utils.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/loopy/schedule/checker/utils.py b/loopy/schedule/checker/utils.py
index af9e4aaef..5c8ae05f2 100644
--- a/loopy/schedule/checker/utils.py
+++ b/loopy/schedule/checker/utils.py
@@ -23,8 +23,8 @@
 import islpy as isl
 
 
-def prettier_map_string(isl_map):
-    return str(isl_map
+def prettier_map_string(map_obj):
+    return str(map_obj
                ).replace("{ ", "{\n").replace(" }", "\n}").replace("; ", ";\n")
 
 

From d345c21fc0b6cc4c6c4de3b403c1565f4f35ec17 Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Sun, 7 Jun 2020 16:00:32 -0500
Subject: [PATCH 082/460] update basedon func change:
 get_isl_maps_from_PairwiseScheduleBuilder(sched_builder,
 knl)->sched_builder.build_maps(knl)

---
 test/test_linearization_checker.py | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/test/test_linearization_checker.py b/test/test_linearization_checker.py
index 9511da729..15d022144 100644
--- a/test/test_linearization_checker.py
+++ b/test/test_linearization_checker.py
@@ -390,7 +390,6 @@ def test_statement_instance_ordering_creation():
     import islpy as isl
     from loopy.schedule.checker import (
         get_schedule_for_statement_pair,
-        get_isl_maps_from_PairwiseScheduleBuilder,
     )
     from loopy.schedule.checker.utils import (
         align_isl_maps_by_var_names,
@@ -452,8 +451,7 @@ def check_sio_for_insn_pair(
             )
 
         # Get two isl maps from the PairwiseScheduleBuilder
-        isl_sched_map_before, isl_sched_map_after = \
-            get_isl_maps_from_PairwiseScheduleBuilder(sched_builder, knl)
+        isl_sched_map_before, isl_sched_map_after = sched_builder.build_maps(knl)
 
         # get map representing lexicographic ordering
         sched_lex_order_map = sched_builder.get_lex_order_map_for_sched_space()

From ee4d98fbef071df03daaa6c99f36e3b10aae1881 Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Mon, 8 Jun 2020 14:13:15 -0500
Subject: [PATCH 083/460] renamed
 create_symbolic_isl_map_from_tuples()->create_symbolic_map_from_tuples()

---
 loopy/schedule/checker/schedule.py | 4 ++--
 loopy/schedule/checker/utils.py    | 3 +--
 2 files changed, 3 insertions(+), 4 deletions(-)

diff --git a/loopy/schedule/checker/schedule.py b/loopy/schedule/checker/schedule.py
index c9cc5981e..d63bb9c41 100644
--- a/loopy/schedule/checker/schedule.py
+++ b/loopy/schedule/checker/schedule.py
@@ -340,7 +340,7 @@ def build_maps(
         from loopy.schedule.checker.utils import (
             list_var_names_in_isl_sets,
             get_isl_space,
-            create_symbolic_isl_map_from_tuples,
+            create_symbolic_map_from_tuples,
             add_dims_to_isl_set,
         )
 
@@ -380,7 +380,7 @@ def _get_map_for_stmt_inst(stmt_inst, dom_inames_ordered):
                 )]
 
             # create isl map
-            return create_symbolic_isl_map_from_tuples(
+            return create_symbolic_map_from_tuples(
                 tuple_pairs_with_domains=zip(tuple_pair, dom_to_intersect),
                 space=sched_space,
                 )
diff --git a/loopy/schedule/checker/utils.py b/loopy/schedule/checker/utils.py
index 5c8ae05f2..8e65d9905 100644
--- a/loopy/schedule/checker/utils.py
+++ b/loopy/schedule/checker/utils.py
@@ -161,7 +161,7 @@ def list_var_names_in_isl_sets(
     return sorted(list(inames))
 
 
-def create_symbolic_isl_map_from_tuples(
+def create_symbolic_map_from_tuples(
         tuple_pairs_with_domains,
         space,
         ):
@@ -191,7 +191,6 @@ def create_symbolic_isl_map_from_tuples(
 
     dim_type = isl.dim_type
 
-    #param_names = space.get_var_names(isl.dim_type.param)
     space_out_names = space.get_var_names(dim_type.out)
     space_in_names = space.get_var_names(isl.dim_type.in_)
 

From 6e2becf4da7f0567ab801bb012ee4dd9a48055bc Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Mon, 8 Jun 2020 14:29:40 -0500
Subject: [PATCH 084/460] remove 'isl' from more locally used var names

---
 loopy/schedule/checker/__init__.py |   2 +-
 loopy/schedule/checker/schedule.py |   6 +-
 test/test_linearization_checker.py | 122 ++++++++++++++---------------
 3 files changed, 65 insertions(+), 65 deletions(-)

diff --git a/loopy/schedule/checker/__init__.py b/loopy/schedule/checker/__init__.py
index 6fd280a45..716a0cb58 100644
--- a/loopy/schedule/checker/__init__.py
+++ b/loopy/schedule/checker/__init__.py
@@ -83,7 +83,7 @@ def get_schedule_for_statement_pair(
             "insn_b",
             )
 
-        # Get two isl maps from the PairwiseScheduleBuilder ----------------------
+        # Get two maps from the PairwiseScheduleBuilder --------------------------
 
         sched_a, sched_b = sched_builder_ab.build_maps(knl)
 
diff --git a/loopy/schedule/checker/schedule.py b/loopy/schedule/checker/schedule.py
index d63bb9c41..8c503b191 100644
--- a/loopy/schedule/checker/schedule.py
+++ b/loopy/schedule/checker/schedule.py
@@ -353,7 +353,7 @@ def _get_map_for_stmt_inst(stmt_inst, dom_inames_ordered):
             dom = knl.get_inames_domain(
                 knl.id_to_insn[stmt_inst.stmt_ref.insn_id].within_inames)
 
-            # create an isl space
+            # create space (an isl space in current implementation)
             # {('statement', <inames> used in statement domain>) ->
             #  (lexicographic ordering dims)}
             if dom_inames_ordered is None:
@@ -370,7 +370,7 @@ def _get_map_for_stmt_inst(stmt_inst, dom_inames_ordered):
                 add_dims_to_isl_set(
                     dom, isl.dim_type.set, [self.statement_var_name], 0), ]
 
-            # Each isl map representing the schedule will map
+            # Each map representing the schedule will map
             # statement instances -> lex time.
             # Right now, statement instance tuples consist of single int.
             # Add all inames from domains to each map domain tuple.
@@ -379,7 +379,7 @@ def _get_map_for_stmt_inst(stmt_inst, dom_inames_ordered):
                 stmt_inst.lex_points
                 )]
 
-            # create isl map
+            # create map
             return create_symbolic_map_from_tuples(
                 tuple_pairs_with_domains=zip(tuple_pair, dom_to_intersect),
                 space=sched_space,
diff --git a/test/test_linearization_checker.py b/test/test_linearization_checker.py
index 16c56274d..559a5c24e 100644
--- a/test/test_linearization_checker.py
+++ b/test/test_linearization_checker.py
@@ -46,7 +46,7 @@
     faulthandler.enable()
 
 
-def test_lexschedule_and_islmap_creation():
+def test_lexschedule_and_map_creation():
     import islpy as isl
     from loopy.schedule.checker import (
         get_schedule_for_statement_pair,
@@ -135,34 +135,34 @@ def test_lexschedule_and_islmap_creation():
     assert sched_ab.stmt_instance_before.lex_points == [0, 'i', 0, 'k', 0]
     assert sched_ab.stmt_instance_after.lex_points == [0, 'i', 1, 'j', 0]
 
-    # Get two isl maps from the PairwiseScheduleBuilder
+    # Get two maps from the PairwiseScheduleBuilder
 
-    isl_sched_before, isl_sched_after = sched_ab.build_maps(knl)
+    sched_map_before, sched_map_after = sched_ab.build_maps(knl)
 
     # Create expected maps, align, compare
 
-    isl_sched_before_expected = isl.Map(
+    sched_map_before_expected = isl.Map(
         "[pi, pk] -> { "
         "[_lp_linchk_statement=0, i, k] -> "
         "[_lp_linchk_l0=0, _lp_linchk_l1=i, _lp_linchk_l2=0, _lp_linchk_l3=k, "
         "_lp_linchk_l4=0] : "
         "0 <= i < pi and 0 <= k < pk }"
         )
-    isl_sched_before_expected = align_isl_maps_by_var_names(
-        isl_sched_before_expected, isl_sched_before)
+    sched_map_before_expected = align_isl_maps_by_var_names(
+        sched_map_before_expected, sched_map_before)
 
-    isl_sched_after_expected = isl.Map(
+    sched_map_after_expected = isl.Map(
         "[pi, pj] -> { "
         "[_lp_linchk_statement=1, i, j] -> "
         "[_lp_linchk_l0=0, _lp_linchk_l1=i, _lp_linchk_l2=1, _lp_linchk_l3=j, "
         "_lp_linchk_l4=0] : "
         "0 <= i < pi and 0 <= j < pj }"
         )
-    isl_sched_after_expected = align_isl_maps_by_var_names(
-        isl_sched_after_expected, isl_sched_after)
+    sched_map_after_expected = align_isl_maps_by_var_names(
+        sched_map_after_expected, sched_map_after)
 
-    assert isl_sched_before == isl_sched_before_expected
-    assert isl_sched_after == isl_sched_after_expected
+    assert sched_map_before == sched_map_before_expected
+    assert sched_map_after == sched_map_after_expected
 
     # ------------------------------------------------------------------------------
     # Relationship between insn_a and insn_c ---------------------------------------
@@ -170,34 +170,34 @@ def test_lexschedule_and_islmap_creation():
     assert sched_ac.stmt_instance_before.lex_points == [0, 'i', 0, 'k', 0]
     assert sched_ac.stmt_instance_after.lex_points == [0, 'i', 1, 'j', 0]
 
-    # Get two isl maps from the PairwiseScheduleBuilder
+    # Get two maps from the PairwiseScheduleBuilder
 
-    isl_sched_before, isl_sched_after = sched_ac.build_maps(knl)
+    sched_map_before, sched_map_after = sched_ac.build_maps(knl)
 
     # Create expected maps, align, compare
 
-    isl_sched_before_expected = isl.Map(
+    sched_map_before_expected = isl.Map(
         "[pi, pk] -> { "
         "[_lp_linchk_statement=0, i, k] -> "
         "[_lp_linchk_l0=0, _lp_linchk_l1=i, _lp_linchk_l2=0, _lp_linchk_l3=k, "
         "_lp_linchk_l4=0] : "
         "0 <= i < pi and 0 <= k < pk }"
         )
-    isl_sched_before_expected = align_isl_maps_by_var_names(
-        isl_sched_before_expected, isl_sched_before)
+    sched_map_before_expected = align_isl_maps_by_var_names(
+        sched_map_before_expected, sched_map_before)
 
-    isl_sched_after_expected = isl.Map(
+    sched_map_after_expected = isl.Map(
         "[pi, pj] -> { "
         "[_lp_linchk_statement=1, i, j] -> "
         "[_lp_linchk_l0=0, _lp_linchk_l1=i, _lp_linchk_l2=1, _lp_linchk_l3=j, "
         "_lp_linchk_l4=0] : "
         "0 <= i < pi and 0 <= j < pj }"
         )
-    isl_sched_after_expected = align_isl_maps_by_var_names(
-        isl_sched_after_expected, isl_sched_after)
+    sched_map_after_expected = align_isl_maps_by_var_names(
+        sched_map_after_expected, sched_map_after)
 
-    assert isl_sched_before == isl_sched_before_expected
-    assert isl_sched_after == isl_sched_after_expected
+    assert sched_map_before == sched_map_before_expected
+    assert sched_map_after == sched_map_after_expected
 
     # ------------------------------------------------------------------------------
     # Relationship between insn_a and insn_d ---------------------------------------
@@ -208,13 +208,13 @@ def perform_insn_ad_checks_with(sid_a, sid_d):
         assert sched_ad.stmt_instance_before.lex_points == [sid_a, 'i', 0, 'k', 0]
         assert sched_ad.stmt_instance_after.lex_points == [sid_d, 't', 0, 0, 0]
 
-        # Get two isl maps from the PairwiseScheduleBuilder
+        # Get two maps from the PairwiseScheduleBuilder
 
-        isl_sched_before, isl_sched_after = sched_ad.build_maps(knl)
+        sched_map_before, sched_map_after = sched_ad.build_maps(knl)
 
         # Create expected maps, align, compare
 
-        isl_sched_before_expected = isl.Map(
+        sched_map_before_expected = isl.Map(
             "[pi, pk] -> { "
             "[_lp_linchk_statement=%d, i, k] -> "
             "[_lp_linchk_l0=%d, _lp_linchk_l1=i, _lp_linchk_l2=0, _lp_linchk_l3=k, "
@@ -222,10 +222,10 @@ def perform_insn_ad_checks_with(sid_a, sid_d):
             "0 <= i < pi and 0 <= k < pk }"
             % (sid_a, sid_a)
             )
-        isl_sched_before_expected = align_isl_maps_by_var_names(
-            isl_sched_before_expected, isl_sched_before)
+        sched_map_before_expected = align_isl_maps_by_var_names(
+            sched_map_before_expected, sched_map_before)
 
-        isl_sched_after_expected = isl.Map(
+        sched_map_after_expected = isl.Map(
             "[pt] -> { "
             "[_lp_linchk_statement=%d, t] -> "
             "[_lp_linchk_l0=%d, _lp_linchk_l1=t, _lp_linchk_l2=0, _lp_linchk_l3=0, "
@@ -233,11 +233,11 @@ def perform_insn_ad_checks_with(sid_a, sid_d):
             "0 <= t < pt }"
             % (sid_d, sid_d)
             )
-        isl_sched_after_expected = align_isl_maps_by_var_names(
-            isl_sched_after_expected, isl_sched_after)
+        sched_map_after_expected = align_isl_maps_by_var_names(
+            sched_map_after_expected, sched_map_after)
 
-        assert isl_sched_before == isl_sched_before_expected
-        assert isl_sched_after == isl_sched_after_expected
+        assert sched_map_before == sched_map_before_expected
+        assert sched_map_after == sched_map_after_expected
 
     if sched_ad.stmt_instance_before.stmt_ref.int_id == 0:
         perform_insn_ad_checks_with(0, 1)
@@ -253,13 +253,13 @@ def perform_insn_bc_checks_with(sid_b, sid_c):
         assert sched_bc.stmt_instance_before.lex_points == [0, 'i', 0, 'j', sid_b]
         assert sched_bc.stmt_instance_after.lex_points == [0, 'i', 0, 'j', sid_c]
 
-        # Get two isl maps from the PairwiseScheduleBuilder
+        # Get two maps from the PairwiseScheduleBuilder
 
-        isl_sched_before, isl_sched_after = sched_bc.build_maps(knl)
+        sched_map_before, sched_map_after = sched_bc.build_maps(knl)
 
         # Create expected maps, align, compare
 
-        isl_sched_before_expected = isl.Map(
+        sched_map_before_expected = isl.Map(
             "[pi, pj] -> { "
             "[_lp_linchk_statement=%d, i, j] -> "
             "[_lp_linchk_l0=0, _lp_linchk_l1=i, _lp_linchk_l2=0, _lp_linchk_l3=j, "
@@ -267,10 +267,10 @@ def perform_insn_bc_checks_with(sid_b, sid_c):
             "0 <= i < pi and 0 <= j < pj }"
             % (sid_b, sid_b)
             )
-        isl_sched_before_expected = align_isl_maps_by_var_names(
-            isl_sched_before_expected, isl_sched_before)
+        sched_map_before_expected = align_isl_maps_by_var_names(
+            sched_map_before_expected, sched_map_before)
 
-        isl_sched_after_expected = isl.Map(
+        sched_map_after_expected = isl.Map(
             "[pi, pj] -> { "
             "[_lp_linchk_statement=%d, i, j] -> "
             "[_lp_linchk_l0=0, _lp_linchk_l1=i, _lp_linchk_l2=0, _lp_linchk_l3=j, "
@@ -278,11 +278,11 @@ def perform_insn_bc_checks_with(sid_b, sid_c):
             "0 <= i < pi and 0 <= j < pj }"
             % (sid_c, sid_c)
             )
-        isl_sched_after_expected = align_isl_maps_by_var_names(
-            isl_sched_after_expected, isl_sched_after)
+        sched_map_after_expected = align_isl_maps_by_var_names(
+            sched_map_after_expected, sched_map_after)
 
-        assert isl_sched_before == isl_sched_before_expected
-        assert isl_sched_after == isl_sched_after_expected
+        assert sched_map_before == sched_map_before_expected
+        assert sched_map_after == sched_map_after_expected
 
     if sched_bc.stmt_instance_before.stmt_ref.int_id == 0:
         perform_insn_bc_checks_with(0, 1)
@@ -298,13 +298,13 @@ def perform_insn_bd_checks_with(sid_b, sid_d):
         assert sched_bd.stmt_instance_before.lex_points == [sid_b, 'i', 0, 'j', 0]
         assert sched_bd.stmt_instance_after.lex_points == [sid_d, 't', 0, 0, 0]
 
-        # Get two isl maps from the PairwiseScheduleBuilder
+        # Get two maps from the PairwiseScheduleBuilder
 
-        isl_sched_before, isl_sched_after = sched_bd.build_maps(knl)
+        sched_map_before, sched_map_after = sched_bd.build_maps(knl)
 
         # Create expected maps, align, compare
 
-        isl_sched_before_expected = isl.Map(
+        sched_map_before_expected = isl.Map(
             "[pi, pj] -> { "
             "[_lp_linchk_statement=%d, i, j] -> "
             "[_lp_linchk_l0=%d, _lp_linchk_l1=i, _lp_linchk_l2=0, _lp_linchk_l3=j, "
@@ -312,10 +312,10 @@ def perform_insn_bd_checks_with(sid_b, sid_d):
             "0 <= i < pi and 0 <= j < pj }"
             % (sid_b, sid_b)
             )
-        isl_sched_before_expected = align_isl_maps_by_var_names(
-            isl_sched_before_expected, isl_sched_before)
+        sched_map_before_expected = align_isl_maps_by_var_names(
+            sched_map_before_expected, sched_map_before)
 
-        isl_sched_after_expected = isl.Map(
+        sched_map_after_expected = isl.Map(
             "[pt] -> { "
             "[_lp_linchk_statement=%d, t] -> "
             "[_lp_linchk_l0=%d, _lp_linchk_l1=t, _lp_linchk_l2=0, _lp_linchk_l3=0, "
@@ -323,11 +323,11 @@ def perform_insn_bd_checks_with(sid_b, sid_d):
             "0 <= t < pt }"
             % (sid_d, sid_d)
             )
-        isl_sched_after_expected = align_isl_maps_by_var_names(
-            isl_sched_after_expected, isl_sched_after)
+        sched_map_after_expected = align_isl_maps_by_var_names(
+            sched_map_after_expected, sched_map_after)
 
-        assert isl_sched_before == isl_sched_before_expected
-        assert isl_sched_after == isl_sched_after_expected
+        assert sched_map_before == sched_map_before_expected
+        assert sched_map_after == sched_map_after_expected
 
     if sched_bd.stmt_instance_before.stmt_ref.int_id == 0:
         perform_insn_bd_checks_with(0, 1)
@@ -343,13 +343,13 @@ def perform_insn_cd_checks_with(sid_c, sid_d):
         assert sched_cd.stmt_instance_before.lex_points == [sid_c, 'i', 0, 'j', 0]
         assert sched_cd.stmt_instance_after.lex_points == [sid_d, 't', 0, 0, 0]
 
-        # Get two isl maps from the PairwiseScheduleBuilder
+        # Get two maps from the PairwiseScheduleBuilder
 
-        isl_sched_before, isl_sched_after = sched_cd.build_maps(knl)
+        sched_map_before, sched_map_after = sched_cd.build_maps(knl)
 
         # Create expected maps, align, compare
 
-        isl_sched_before_expected = isl.Map(
+        sched_map_before_expected = isl.Map(
             "[pi, pj] -> { "
             "[_lp_linchk_statement=%d, i, j] -> "
             "[_lp_linchk_l0=%d, _lp_linchk_l1=i, _lp_linchk_l2=0, _lp_linchk_l3=j, "
@@ -357,10 +357,10 @@ def perform_insn_cd_checks_with(sid_c, sid_d):
             "0 <= i < pi and 0 <= j < pj }"
             % (sid_c, sid_c)
             )
-        isl_sched_before_expected = align_isl_maps_by_var_names(
-            isl_sched_before_expected, isl_sched_before)
+        sched_map_before_expected = align_isl_maps_by_var_names(
+            sched_map_before_expected, sched_map_before)
 
-        isl_sched_after_expected = isl.Map(
+        sched_map_after_expected = isl.Map(
             "[pt] -> { "
             "[_lp_linchk_statement=%d, t] -> "
             "[_lp_linchk_l0=%d, _lp_linchk_l1=t, _lp_linchk_l2=0, _lp_linchk_l3=0, "
@@ -368,11 +368,11 @@ def perform_insn_cd_checks_with(sid_c, sid_d):
             "0 <= t < pt }"
             % (sid_d, sid_d)
             )
-        isl_sched_after_expected = align_isl_maps_by_var_names(
-            isl_sched_after_expected, isl_sched_after)
+        sched_map_after_expected = align_isl_maps_by_var_names(
+            sched_map_after_expected, sched_map_after)
 
-        assert isl_sched_before == isl_sched_before_expected
-        assert isl_sched_after == isl_sched_after_expected
+        assert sched_map_before == sched_map_before_expected
+        assert sched_map_after == sched_map_after_expected
 
     if sched_cd.stmt_instance_before.stmt_ref.int_id == 0:
         perform_insn_cd_checks_with(0, 1)

From f528b58622953390e5930c4ba1543ae2c17abae9 Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Mon, 8 Jun 2020 14:32:42 -0500
Subject: [PATCH 085/460] rename _union_of_isl_sets_or_maps()->_get_union()

---
 loopy/schedule/checker/utils.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/loopy/schedule/checker/utils.py b/loopy/schedule/checker/utils.py
index 8e65d9905..4b51a16d1 100644
--- a/loopy/schedule/checker/utils.py
+++ b/loopy/schedule/checker/utils.py
@@ -143,9 +143,9 @@ def align_isl_maps_by_var_names(input_map, target_map):
     return aligned_input_map
 
 
-def _union_of_isl_sets_or_maps(set_list):
-    union = set_list[0]
-    for s in set_list[1:]:
+def _get_union(list_items):
+    union = list_items[0]
+    for s in list_items[1:]:
         union = union.union(s)
     return union
 
@@ -247,7 +247,7 @@ def create_symbolic_map_from_tuples(
         all_maps.append(
             map_from_set.intersect_domain(dom_with_all_inames))
 
-    return _union_of_isl_sets_or_maps(all_maps)
+    return _get_union(all_maps)
 
 
 def set_all_isl_space_names(

From 7c2309ab23db59413b5fb3dbdf3cb58325087941 Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Mon, 8 Jun 2020 14:42:59 -0500
Subject: [PATCH 086/460] rename local vars isl_sched_map_*->sched_map_*

---
 test/test_linearization_checker.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/test/test_linearization_checker.py b/test/test_linearization_checker.py
index 3745564d2..5f7329ba1 100644
--- a/test/test_linearization_checker.py
+++ b/test/test_linearization_checker.py
@@ -451,7 +451,7 @@ def check_sio_for_insn_pair(
             )
 
         # Get two isl maps from the PairwiseScheduleBuilder
-        isl_sched_map_before, isl_sched_map_after = sched_builder.build_maps(knl)
+        sched_map_before, sched_map_after = sched_builder.build_maps(knl)
 
         # get map representing lexicographic ordering
         sched_lex_order_map = sched_builder.get_lex_order_map_for_sched_space()
@@ -461,8 +461,8 @@ def check_sio_for_insn_pair(
         # create statement instance ordering,
         # maps each statement instance to all statement instances occuring later
         sio = get_statement_ordering_map(
-            isl_sched_map_before,
-            isl_sched_map_after,
+            sched_map_before,
+            sched_map_after,
             sched_lex_order_map,
             )
 

From 070ca3a534ea135607cbfd0717b2caef9b1d5a49 Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Mon, 8 Jun 2020 16:31:06 -0500
Subject: [PATCH 087/460] create map_names_match_check() function for checking
 conditions on isl set/map name matching; use it to verify conditions before
 calling islpy.align_space() (which doesn't perform all necessary checks);
 TODO after making sure this works: remove now unused calls to
 reorder_dims_by_name()

---
 loopy/schedule/checker/utils.py | 54 +++++++++++++++++++++++++++++----
 1 file changed, 48 insertions(+), 6 deletions(-)

diff --git a/loopy/schedule/checker/utils.py b/loopy/schedule/checker/utils.py
index 4b51a16d1..852b24735 100644
--- a/loopy/schedule/checker/utils.py
+++ b/loopy/schedule/checker/utils.py
@@ -44,6 +44,27 @@ def add_dims_to_isl_set(isl_set, dim_type, names, new_idx_start):
     return new_set
 
 
+def map_names_match_check(
+        obj_map,
+        desired_names,
+        dim_type,
+        assert_subset=True,
+        assert_permutation=True,
+        ):
+
+    obj_map_names = obj_map.space.get_var_names(dim_type)
+    if assert_permutation:
+        if not set(obj_map_names) == set(desired_names):
+            raise ValueError(
+                "Set of map names %s for dim %s does not match target set %s"
+                % (obj_map_names, dim_type, desired_names))
+    elif assert_subset:
+        if not set(obj_map_names).issubset(desired_names):
+            raise ValueError(
+                "Map names %s for dim %s are not a subset of target names %s"
+                % (obj_map_names, dim_type, desired_names))
+
+
 def reorder_dims_by_name(
         isl_set, dim_type, desired_dims_ordered,
         add_missing=False, new_names_are_permutation_only=False):
@@ -75,7 +96,10 @@ def reorder_dims_by_name(
 
     """
 
-    assert set(isl_set.get_var_names(dim_type)).issubset(desired_dims_ordered)
+    map_names_match_check(
+        isl_set, desired_dims_ordered, dim_type,
+        assert_subset=True, assert_permutation=False)
+
     assert dim_type != isl.dim_type.param
 
     if new_names_are_permutation_only and (
@@ -117,13 +141,23 @@ def reorder_dims_by_name(
 
 def align_isl_maps_by_var_names(input_map, target_map):
 
+    # first make sure names match
+    for dt in [isl.dim_type.in_, isl.dim_type.out, isl.dim_type.param]:
+        map_names_match_check(
+            input_map, target_map.get_var_names(dt), dt,
+            assert_permutation=True)
+
+    aligned_input_map = isl.align_spaces(input_map, target_map)
+
+    # TODO remove once satisfied that above can replace below:
+
     # align params
-    aligned_input_map = input_map.align_params(target_map.space)
+    _aligned_input_map = input_map.align_params(target_map.space)
 
     # align in_ dims
     target_map_in_names = target_map.space.get_var_names(isl.dim_type.in_)
-    aligned_input_map = reorder_dims_by_name(
-        aligned_input_map,
+    _aligned_input_map = reorder_dims_by_name(
+        _aligned_input_map,
         isl.dim_type.in_,
         target_map_in_names,
         add_missing=False,
@@ -132,14 +166,22 @@ def align_isl_maps_by_var_names(input_map, target_map):
 
     # align out dims
     target_map_out_names = target_map.space.get_var_names(isl.dim_type.out)
-    aligned_input_map = reorder_dims_by_name(
-        aligned_input_map,
+    _aligned_input_map = reorder_dims_by_name(
+        _aligned_input_map,
         isl.dim_type.out,
         target_map_out_names,
         add_missing=False,
         new_names_are_permutation_only=True,
         )
 
+    assert aligned_input_map == _aligned_input_map
+    assert aligned_input_map.get_var_names(
+        isl.dim_type.param) == _aligned_input_map.get_var_names(isl.dim_type.param)
+    assert aligned_input_map.get_var_names(
+        isl.dim_type.in_) == _aligned_input_map.get_var_names(isl.dim_type.in_)
+    assert aligned_input_map.get_var_names(
+        isl.dim_type.out) == _aligned_input_map.get_var_names(isl.dim_type.out)
+
     return aligned_input_map
 
 

From 2f980346c719722ef61fb36055ec31705f17c534 Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Mon, 8 Jun 2020 16:48:33 -0500
Subject: [PATCH 088/460] rename
 align_isl_maps_by_var_names()->ensure_dim_names_match_and_align()

---
 loopy/schedule/checker/utils.py    | 43 ++++--------------------------
 test/test_linearization_checker.py | 26 +++++++++---------
 2 files changed, 18 insertions(+), 51 deletions(-)

diff --git a/loopy/schedule/checker/utils.py b/loopy/schedule/checker/utils.py
index 852b24735..2a42a098f 100644
--- a/loopy/schedule/checker/utils.py
+++ b/loopy/schedule/checker/utils.py
@@ -139,50 +139,17 @@ def reorder_dims_by_name(
     return new_set
 
 
-def align_isl_maps_by_var_names(input_map, target_map):
+def ensure_dim_names_match_and_align(obj_map, tgt_map):
 
     # first make sure names match
     for dt in [isl.dim_type.in_, isl.dim_type.out, isl.dim_type.param]:
         map_names_match_check(
-            input_map, target_map.get_var_names(dt), dt,
+            obj_map, tgt_map.get_var_names(dt), dt,
             assert_permutation=True)
 
-    aligned_input_map = isl.align_spaces(input_map, target_map)
-
-    # TODO remove once satisfied that above can replace below:
-
-    # align params
-    _aligned_input_map = input_map.align_params(target_map.space)
-
-    # align in_ dims
-    target_map_in_names = target_map.space.get_var_names(isl.dim_type.in_)
-    _aligned_input_map = reorder_dims_by_name(
-        _aligned_input_map,
-        isl.dim_type.in_,
-        target_map_in_names,
-        add_missing=False,
-        new_names_are_permutation_only=True,
-        )
-
-    # align out dims
-    target_map_out_names = target_map.space.get_var_names(isl.dim_type.out)
-    _aligned_input_map = reorder_dims_by_name(
-        _aligned_input_map,
-        isl.dim_type.out,
-        target_map_out_names,
-        add_missing=False,
-        new_names_are_permutation_only=True,
-        )
-
-    assert aligned_input_map == _aligned_input_map
-    assert aligned_input_map.get_var_names(
-        isl.dim_type.param) == _aligned_input_map.get_var_names(isl.dim_type.param)
-    assert aligned_input_map.get_var_names(
-        isl.dim_type.in_) == _aligned_input_map.get_var_names(isl.dim_type.in_)
-    assert aligned_input_map.get_var_names(
-        isl.dim_type.out) == _aligned_input_map.get_var_names(isl.dim_type.out)
-
-    return aligned_input_map
+    aligned_obj_map = isl.align_spaces(obj_map, tgt_map)
+
+    return aligned_obj_map
 
 
 def _get_union(list_items):
diff --git a/test/test_linearization_checker.py b/test/test_linearization_checker.py
index 559a5c24e..41ac16feb 100644
--- a/test/test_linearization_checker.py
+++ b/test/test_linearization_checker.py
@@ -52,7 +52,7 @@ def test_lexschedule_and_map_creation():
         get_schedule_for_statement_pair,
     )
     from loopy.schedule.checker.utils import (
-        align_isl_maps_by_var_names,
+        ensure_dim_names_match_and_align,
     )
 
     # example kernel
@@ -148,7 +148,7 @@ def test_lexschedule_and_map_creation():
         "_lp_linchk_l4=0] : "
         "0 <= i < pi and 0 <= k < pk }"
         )
-    sched_map_before_expected = align_isl_maps_by_var_names(
+    sched_map_before_expected = ensure_dim_names_match_and_align(
         sched_map_before_expected, sched_map_before)
 
     sched_map_after_expected = isl.Map(
@@ -158,7 +158,7 @@ def test_lexschedule_and_map_creation():
         "_lp_linchk_l4=0] : "
         "0 <= i < pi and 0 <= j < pj }"
         )
-    sched_map_after_expected = align_isl_maps_by_var_names(
+    sched_map_after_expected = ensure_dim_names_match_and_align(
         sched_map_after_expected, sched_map_after)
 
     assert sched_map_before == sched_map_before_expected
@@ -183,7 +183,7 @@ def test_lexschedule_and_map_creation():
         "_lp_linchk_l4=0] : "
         "0 <= i < pi and 0 <= k < pk }"
         )
-    sched_map_before_expected = align_isl_maps_by_var_names(
+    sched_map_before_expected = ensure_dim_names_match_and_align(
         sched_map_before_expected, sched_map_before)
 
     sched_map_after_expected = isl.Map(
@@ -193,7 +193,7 @@ def test_lexschedule_and_map_creation():
         "_lp_linchk_l4=0] : "
         "0 <= i < pi and 0 <= j < pj }"
         )
-    sched_map_after_expected = align_isl_maps_by_var_names(
+    sched_map_after_expected = ensure_dim_names_match_and_align(
         sched_map_after_expected, sched_map_after)
 
     assert sched_map_before == sched_map_before_expected
@@ -222,7 +222,7 @@ def perform_insn_ad_checks_with(sid_a, sid_d):
             "0 <= i < pi and 0 <= k < pk }"
             % (sid_a, sid_a)
             )
-        sched_map_before_expected = align_isl_maps_by_var_names(
+        sched_map_before_expected = ensure_dim_names_match_and_align(
             sched_map_before_expected, sched_map_before)
 
         sched_map_after_expected = isl.Map(
@@ -233,7 +233,7 @@ def perform_insn_ad_checks_with(sid_a, sid_d):
             "0 <= t < pt }"
             % (sid_d, sid_d)
             )
-        sched_map_after_expected = align_isl_maps_by_var_names(
+        sched_map_after_expected = ensure_dim_names_match_and_align(
             sched_map_after_expected, sched_map_after)
 
         assert sched_map_before == sched_map_before_expected
@@ -267,7 +267,7 @@ def perform_insn_bc_checks_with(sid_b, sid_c):
             "0 <= i < pi and 0 <= j < pj }"
             % (sid_b, sid_b)
             )
-        sched_map_before_expected = align_isl_maps_by_var_names(
+        sched_map_before_expected = ensure_dim_names_match_and_align(
             sched_map_before_expected, sched_map_before)
 
         sched_map_after_expected = isl.Map(
@@ -278,7 +278,7 @@ def perform_insn_bc_checks_with(sid_b, sid_c):
             "0 <= i < pi and 0 <= j < pj }"
             % (sid_c, sid_c)
             )
-        sched_map_after_expected = align_isl_maps_by_var_names(
+        sched_map_after_expected = ensure_dim_names_match_and_align(
             sched_map_after_expected, sched_map_after)
 
         assert sched_map_before == sched_map_before_expected
@@ -312,7 +312,7 @@ def perform_insn_bd_checks_with(sid_b, sid_d):
             "0 <= i < pi and 0 <= j < pj }"
             % (sid_b, sid_b)
             )
-        sched_map_before_expected = align_isl_maps_by_var_names(
+        sched_map_before_expected = ensure_dim_names_match_and_align(
             sched_map_before_expected, sched_map_before)
 
         sched_map_after_expected = isl.Map(
@@ -323,7 +323,7 @@ def perform_insn_bd_checks_with(sid_b, sid_d):
             "0 <= t < pt }"
             % (sid_d, sid_d)
             )
-        sched_map_after_expected = align_isl_maps_by_var_names(
+        sched_map_after_expected = ensure_dim_names_match_and_align(
             sched_map_after_expected, sched_map_after)
 
         assert sched_map_before == sched_map_before_expected
@@ -357,7 +357,7 @@ def perform_insn_cd_checks_with(sid_c, sid_d):
             "0 <= i < pi and 0 <= j < pj }"
             % (sid_c, sid_c)
             )
-        sched_map_before_expected = align_isl_maps_by_var_names(
+        sched_map_before_expected = ensure_dim_names_match_and_align(
             sched_map_before_expected, sched_map_before)
 
         sched_map_after_expected = isl.Map(
@@ -368,7 +368,7 @@ def perform_insn_cd_checks_with(sid_c, sid_d):
             "0 <= t < pt }"
             % (sid_d, sid_d)
             )
-        sched_map_after_expected = align_isl_maps_by_var_names(
+        sched_map_after_expected = ensure_dim_names_match_and_align(
             sched_map_after_expected, sched_map_after)
 
         assert sched_map_before == sched_map_before_expected

From 0f4269b86ae1d7b1863184b731d007bb8463324f Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Mon, 8 Jun 2020 16:50:25 -0500
Subject: [PATCH 089/460] update after renaming of
 align_isl_maps_by_var_names()->ensure_dim_names_match_and_align()

---
 test/test_linearization_checker.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/test/test_linearization_checker.py b/test/test_linearization_checker.py
index 84decedca..5640da8b8 100644
--- a/test/test_linearization_checker.py
+++ b/test/test_linearization_checker.py
@@ -392,7 +392,7 @@ def test_statement_instance_ordering_creation():
         get_schedule_for_statement_pair,
     )
     from loopy.schedule.checker.utils import (
-        align_isl_maps_by_var_names,
+        ensure_dim_names_match_and_align,
         append_marker_to_isl_map_var_names,
     )
     from loopy.schedule.checker.lexicographic_order_map import (
@@ -466,7 +466,7 @@ def check_sio_for_insn_pair(
             sched_lex_order_map,
             )
 
-        sio_aligned = align_isl_maps_by_var_names(sio, expected_sio)
+        sio_aligned = ensure_dim_names_match_and_align(sio, expected_sio)
 
         assert sio_aligned == expected_sio
 

From c7ce50da737d3a6df80a26184fd6d5d7c3762d45 Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Mon, 8 Jun 2020 17:00:06 -0500
Subject: [PATCH 090/460] use map_names_match_check() to perform name set match
 check inside reorder_dims_by_name()

---
 loopy/schedule/checker/utils.py | 10 +---------
 1 file changed, 1 insertion(+), 9 deletions(-)

diff --git a/loopy/schedule/checker/utils.py b/loopy/schedule/checker/utils.py
index 2a42a098f..b9de94921 100644
--- a/loopy/schedule/checker/utils.py
+++ b/loopy/schedule/checker/utils.py
@@ -98,18 +98,10 @@ def reorder_dims_by_name(
 
     map_names_match_check(
         isl_set, desired_dims_ordered, dim_type,
-        assert_subset=True, assert_permutation=False)
+        assert_subset=True, assert_permutation=new_names_are_permutation_only)
 
     assert dim_type != isl.dim_type.param
 
-    if new_names_are_permutation_only and (
-            set(isl_set.get_var_names(dim_type))
-            != set(desired_dims_ordered)):
-        raise ValueError(
-            "Var name sets must match with new_names_are_permutation_only=True. "
-            "isl vars: %s, desired dims: %s"
-            % (isl_set.get_var_names(dim_type), desired_dims_ordered))
-
     other_dim_type = isl.dim_type.param
     other_dim_len = len(isl_set.get_var_names(other_dim_type))
 

From d1a73971fd24902a0068f6c3cff3306c031b22e0 Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Mon, 8 Jun 2020 17:21:48 -0500
Subject: [PATCH 091/460] rename
 reorder_dims_by_name()->insert_missing_dims_and_reorder_by_name(); remove
 params add_missing (now always true) and new_names_are_permutation_only (now
 always false)

---
 loopy/schedule/checker/utils.py | 38 +++++++++++----------------------
 1 file changed, 12 insertions(+), 26 deletions(-)

diff --git a/loopy/schedule/checker/utils.py b/loopy/schedule/checker/utils.py
index b9de94921..e862d166e 100644
--- a/loopy/schedule/checker/utils.py
+++ b/loopy/schedule/checker/utils.py
@@ -65,32 +65,21 @@ def map_names_match_check(
                 % (obj_map_names, dim_type, desired_names))
 
 
-def reorder_dims_by_name(
-        isl_set, dim_type, desired_dims_ordered,
-        add_missing=False, new_names_are_permutation_only=False):
+def insert_missing_dims_and_reorder_by_name(
+        isl_set, dim_type, desired_dims_ordered):
     """Return an isl_set with the dimensions in the specified order.
 
     :arg isl_set: A :class:`islpy.Set` whose dimensions are
-        to be reordered.
+        to be reordered and, if necessary, augmented with missing dimensions.
 
     :arg dim_type: A :class:`islpy.dim_type`, i.e., an :class:`int`,
         specifying the dimension to be reordered.
 
     :arg desired_dims_ordered: A :class:`list` of :class:`str` elements
-        representing the desired dimensions order by dimension name.
-
-    :arg add_missing: A :class:`bool` specifying whether to insert
-        dimensions (by name) found in `desired_dims_ordered` that are not
-        present in `isl_set`.
-
-    :arg new_names_are_permutation_only: A :class:`bool` indicating that
-        `desired_dims_ordered` contains the same names as the specified
-        dimensions in `isl_set`, and does not, e.g., contain additional
-        dimension names not found in `isl_set`. If set to True, and these
-        two sets of names do not match, an error is produced.
+        representing the desired dimensions in order by dimension name.
 
     :returns: An :class:`islpy.Set` matching `isl_set` with the
-        dimension order matching `desired_dims_ordered`, optionally
+        dimension order matching `desired_dims_ordered`,
         including additional dimensions present in `desred_dims_ordered`
         that are not present in `isl_set`.
 
@@ -98,7 +87,7 @@ def reorder_dims_by_name(
 
     map_names_match_check(
         isl_set, desired_dims_ordered, dim_type,
-        assert_subset=True, assert_permutation=new_names_are_permutation_only)
+        assert_subset=True, assert_permutation=False)
 
     assert dim_type != isl.dim_type.param
 
@@ -109,12 +98,11 @@ def reorder_dims_by_name(
     for desired_idx, name in enumerate(desired_dims_ordered):
         # if iname doesn't exist in set, add dim:
         if name not in new_set.get_var_names(dim_type):
-            if add_missing:
-                # insert missing dim in correct location
-                new_set = new_set.insert_dims(
-                    dim_type, desired_idx, 1
-                    ).set_dim_name(
-                    dim_type, desired_idx, name)
+            # insert missing dim in correct location
+            new_set = new_set.insert_dims(
+                dim_type, desired_idx, 1
+                ).set_dim_name(
+                dim_type, desired_idx, name)
         else:  # iname exists in set
             current_idx = new_set.find_dim_by_name(dim_type, name)
             if current_idx != desired_idx:
@@ -237,11 +225,9 @@ def create_symbolic_map_from_tuples(
         # if there are any dimensions in dom that are missing from
         # map_from_set, we have a problem I think?
         # (assertion checks this in add_missing...
-        dom_with_all_inames = reorder_dims_by_name(
+        dom_with_all_inames = insert_missing_dims_and_reorder_by_name(
             dom, isl.dim_type.set,
             space_in_names,
-            add_missing=True,
-            new_names_are_permutation_only=False,
             )
 
         # intersect domain with this map

From 14105e5de024ebb657b25e2c29d95cd030291aab Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Thu, 11 Jun 2020 20:17:15 -0500
Subject: [PATCH 092/460] add class name to __str__ for StatementRef()

---
 loopy/schedule/checker/schedule.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/loopy/schedule/checker/schedule.py b/loopy/schedule/checker/schedule.py
index 8c503b191..18337481c 100644
--- a/loopy/schedule/checker/schedule.py
+++ b/loopy/schedule/checker/schedule.py
@@ -69,7 +69,7 @@ def __str__(self):
             int_id = ":%d" % (self.int_id)
         else:
             int_id = ""
-        return "%s%s" % (self.insn_id, int_id)
+        return "%s(%s%s)" % (self.__class__.__name__, self.insn_id, int_id)
 
 
 class StatementInstanceSet(object):

From 6058001645158d0ca04c59b8e40cc082b89319c8 Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Tue, 30 Jun 2020 06:59:10 -0500
Subject: [PATCH 093/460] make the integer ids that are used to represent
 before/after statements in domain of PairwiseSchedule deterministic so that
 the 'before' statement is always 0 and 'after' statement is always 1 (unless
 they're the same statement)

---
 loopy/schedule/checker/schedule.py | 13 +++--
 test/test_linearization_checker.py | 85 +++++++++++++++++++-----------
 2 files changed, 62 insertions(+), 36 deletions(-)

diff --git a/loopy/schedule/checker/schedule.py b/loopy/schedule/checker/schedule.py
index 18337481c..e4a9a5864 100644
--- a/loopy/schedule/checker/schedule.py
+++ b/loopy/schedule/checker/schedule.py
@@ -167,6 +167,13 @@ def __init__(
         # PairwiseScheduleBuilder statements
         self.stmt_instance_before = None
         self.stmt_instance_after = None
+
+        # Determine integer IDs that will represent each statement in mapping
+        # (dependency map creation assumes sid_before=0 and sid_after=1, unless
+        # before and after refer to same stmt, in which case sid_before=sid_after=0)
+        int_sid_before = 0
+        int_sid_after = 0 if before_insn_id == after_insn_id else 1
+
         # TODO when/after dependencies are added, consider the possibility
         # of removing the two-statements-per-PairwiseScheduleBuilder limitation
 
@@ -178,7 +185,6 @@ def __init__(
         # ordering, initially this as a 1-d point with value 0
         next_insn_lex_tuple = [0]
         stmt_added_since_prev_block_at_tier = [False]
-        next_sid = 0
         for linearization_item in linearization_items_ordered:
             if isinstance(linearization_item, EnterLoop):
                 iname = linearization_item.iname
@@ -241,7 +247,7 @@ def __init__(
                     self.stmt_instance_before = StatementInstanceSet(
                             StatementRef(
                                 insn_id=lp_insn_id,
-                                int_id=next_sid,  # int representing insn
+                                int_id=int_sid_before,  # int representing insn
                                 ),
                             next_insn_lex_tuple[:])
                     stmt_added = True
@@ -251,7 +257,7 @@ def __init__(
                     self.stmt_instance_after = StatementInstanceSet(
                             StatementRef(
                                 insn_id=lp_insn_id,
-                                int_id=next_sid,  # int representing insn
+                                int_id=int_sid_after,  # int representing insn
                                 ),
                             next_insn_lex_tuple[:])
                     stmt_added = True
@@ -262,7 +268,6 @@ def __init__(
                 if stmt_added:
                     # increment lex dim val enumerating items in current code block
                     next_insn_lex_tuple[-1] = next_insn_lex_tuple[-1] + 1
-                    next_sid += 1
 
                     # all current (nested) blocks now contain a statement
                     stmt_added_since_prev_block_at_tier = [True]*len(
diff --git a/test/test_linearization_checker.py b/test/test_linearization_checker.py
index 41ac16feb..520efba9b 100644
--- a/test/test_linearization_checker.py
+++ b/test/test_linearization_checker.py
@@ -130,6 +130,15 @@ def test_lexschedule_and_map_creation():
         "insn_d",
         )
 
+    # There are multiple potential linearization orders for this kernel, so when
+    # performing our comparisons for schedule correctness, we need to know which
+    # order loopy chose.
+    from loopy.schedule import RunInstruction
+    linearized_insn_ord = []
+    for item in linearization_items:
+        if isinstance(item, RunInstruction):
+            linearized_insn_ord.append(item.insn_id)
+
     # Relationship between insn_a and insn_b ---------------------------------------
 
     assert sched_ab.stmt_instance_before.lex_points == [0, 'i', 0, 'k', 0]
@@ -204,9 +213,10 @@ def test_lexschedule_and_map_creation():
 
     # insn_a and insn_d could have been linearized in either order
     # (i loop could be before or after t loop)
-    def perform_insn_ad_checks_with(sid_a, sid_d):
-        assert sched_ad.stmt_instance_before.lex_points == [sid_a, 'i', 0, 'k', 0]
-        assert sched_ad.stmt_instance_after.lex_points == [sid_d, 't', 0, 0, 0]
+    def perform_insn_ad_checks_with(a_lex_idx, d_lex_idx):
+        assert sched_ad.stmt_instance_before.lex_points == [
+            a_lex_idx, 'i', 0, 'k', 0]
+        assert sched_ad.stmt_instance_after.lex_points == [d_lex_idx, 't', 0, 0, 0]
 
         # Get two maps from the PairwiseScheduleBuilder
 
@@ -216,22 +226,22 @@ def perform_insn_ad_checks_with(sid_a, sid_d):
 
         sched_map_before_expected = isl.Map(
             "[pi, pk] -> { "
-            "[_lp_linchk_statement=%d, i, k] -> "
+            "[_lp_linchk_statement=0, i, k] -> "
             "[_lp_linchk_l0=%d, _lp_linchk_l1=i, _lp_linchk_l2=0, _lp_linchk_l3=k, "
             "_lp_linchk_l4=0] : "
             "0 <= i < pi and 0 <= k < pk }"
-            % (sid_a, sid_a)
+            % (a_lex_idx)
             )
         sched_map_before_expected = ensure_dim_names_match_and_align(
             sched_map_before_expected, sched_map_before)
 
         sched_map_after_expected = isl.Map(
             "[pt] -> { "
-            "[_lp_linchk_statement=%d, t] -> "
+            "[_lp_linchk_statement=1, t] -> "
             "[_lp_linchk_l0=%d, _lp_linchk_l1=t, _lp_linchk_l2=0, _lp_linchk_l3=0, "
             "_lp_linchk_l4=0] : "
             "0 <= t < pt }"
-            % (sid_d, sid_d)
+            % (d_lex_idx)
             )
         sched_map_after_expected = ensure_dim_names_match_and_align(
             sched_map_after_expected, sched_map_after)
@@ -239,9 +249,11 @@ def perform_insn_ad_checks_with(sid_a, sid_d):
         assert sched_map_before == sched_map_before_expected
         assert sched_map_after == sched_map_after_expected
 
-    if sched_ad.stmt_instance_before.stmt_ref.int_id == 0:
+    if linearized_insn_ord.index("insn_a") < linearized_insn_ord.index("insn_d"):
+        # insn_a was linearized first, check schedule accordingly
         perform_insn_ad_checks_with(0, 1)
     else:
+        # insn_d was linearized first, check schedule accordingly
         perform_insn_ad_checks_with(1, 0)
 
     # ------------------------------------------------------------------------------
@@ -249,9 +261,10 @@ def perform_insn_ad_checks_with(sid_a, sid_d):
 
     # insn_b and insn_c could have been linearized in either order
     # (i loop could be before or after t loop)
-    def perform_insn_bc_checks_with(sid_b, sid_c):
-        assert sched_bc.stmt_instance_before.lex_points == [0, 'i', 0, 'j', sid_b]
-        assert sched_bc.stmt_instance_after.lex_points == [0, 'i', 0, 'j', sid_c]
+    def perform_insn_bc_checks_with(b_lex_idx, c_lex_idx):
+        assert sched_bc.stmt_instance_before.lex_points == [
+            0, 'i', 0, 'j', b_lex_idx]
+        assert sched_bc.stmt_instance_after.lex_points == [0, 'i', 0, 'j', c_lex_idx]
 
         # Get two maps from the PairwiseScheduleBuilder
 
@@ -261,22 +274,22 @@ def perform_insn_bc_checks_with(sid_b, sid_c):
 
         sched_map_before_expected = isl.Map(
             "[pi, pj] -> { "
-            "[_lp_linchk_statement=%d, i, j] -> "
+            "[_lp_linchk_statement=0, i, j] -> "
             "[_lp_linchk_l0=0, _lp_linchk_l1=i, _lp_linchk_l2=0, _lp_linchk_l3=j, "
             "_lp_linchk_l4=%d] : "
             "0 <= i < pi and 0 <= j < pj }"
-            % (sid_b, sid_b)
+            % (b_lex_idx)
             )
         sched_map_before_expected = ensure_dim_names_match_and_align(
             sched_map_before_expected, sched_map_before)
 
         sched_map_after_expected = isl.Map(
             "[pi, pj] -> { "
-            "[_lp_linchk_statement=%d, i, j] -> "
+            "[_lp_linchk_statement=1, i, j] -> "
             "[_lp_linchk_l0=0, _lp_linchk_l1=i, _lp_linchk_l2=0, _lp_linchk_l3=j, "
             "_lp_linchk_l4=%d] : "
             "0 <= i < pi and 0 <= j < pj }"
-            % (sid_c, sid_c)
+            % (c_lex_idx)
             )
         sched_map_after_expected = ensure_dim_names_match_and_align(
             sched_map_after_expected, sched_map_after)
@@ -284,9 +297,11 @@ def perform_insn_bc_checks_with(sid_b, sid_c):
         assert sched_map_before == sched_map_before_expected
         assert sched_map_after == sched_map_after_expected
 
-    if sched_bc.stmt_instance_before.stmt_ref.int_id == 0:
+    if linearized_insn_ord.index("insn_b") < linearized_insn_ord.index("insn_c"):
+        # insn_b was linearized first, check schedule accordingly
         perform_insn_bc_checks_with(0, 1)
     else:
+        # insn_c was linearized first, check schedule accordingly
         perform_insn_bc_checks_with(1, 0)
 
     # ------------------------------------------------------------------------------
@@ -294,9 +309,10 @@ def perform_insn_bc_checks_with(sid_b, sid_c):
 
     # insn_b and insn_d could have been linearized in either order
     # (i loop could be before or after t loop)
-    def perform_insn_bd_checks_with(sid_b, sid_d):
-        assert sched_bd.stmt_instance_before.lex_points == [sid_b, 'i', 0, 'j', 0]
-        assert sched_bd.stmt_instance_after.lex_points == [sid_d, 't', 0, 0, 0]
+    def perform_insn_bd_checks_with(b_lex_idx, d_lex_idx):
+        assert sched_bd.stmt_instance_before.lex_points == [
+            b_lex_idx, 'i', 0, 'j', 0]
+        assert sched_bd.stmt_instance_after.lex_points == [d_lex_idx, 't', 0, 0, 0]
 
         # Get two maps from the PairwiseScheduleBuilder
 
@@ -306,22 +322,22 @@ def perform_insn_bd_checks_with(sid_b, sid_d):
 
         sched_map_before_expected = isl.Map(
             "[pi, pj] -> { "
-            "[_lp_linchk_statement=%d, i, j] -> "
+            "[_lp_linchk_statement=0, i, j] -> "
             "[_lp_linchk_l0=%d, _lp_linchk_l1=i, _lp_linchk_l2=0, _lp_linchk_l3=j, "
             "_lp_linchk_l4=0] : "
             "0 <= i < pi and 0 <= j < pj }"
-            % (sid_b, sid_b)
+            % (b_lex_idx)
             )
         sched_map_before_expected = ensure_dim_names_match_and_align(
             sched_map_before_expected, sched_map_before)
 
         sched_map_after_expected = isl.Map(
             "[pt] -> { "
-            "[_lp_linchk_statement=%d, t] -> "
+            "[_lp_linchk_statement=1, t] -> "
             "[_lp_linchk_l0=%d, _lp_linchk_l1=t, _lp_linchk_l2=0, _lp_linchk_l3=0, "
             "_lp_linchk_l4=0] : "
             "0 <= t < pt }"
-            % (sid_d, sid_d)
+            % (d_lex_idx)
             )
         sched_map_after_expected = ensure_dim_names_match_and_align(
             sched_map_after_expected, sched_map_after)
@@ -329,9 +345,11 @@ def perform_insn_bd_checks_with(sid_b, sid_d):
         assert sched_map_before == sched_map_before_expected
         assert sched_map_after == sched_map_after_expected
 
-    if sched_bd.stmt_instance_before.stmt_ref.int_id == 0:
+    if linearized_insn_ord.index("insn_b") < linearized_insn_ord.index("insn_d"):
+        # insn_b was linearized first, check schedule accordingly
         perform_insn_bd_checks_with(0, 1)
     else:
+        # insn_d was linearized first, check schedule accordingly
         perform_insn_bd_checks_with(1, 0)
 
     # ------------------------------------------------------------------------------
@@ -339,9 +357,10 @@ def perform_insn_bd_checks_with(sid_b, sid_d):
 
     # insn_c and insn_d could have been linearized in either order
     # (i loop could be before or after t loop)
-    def perform_insn_cd_checks_with(sid_c, sid_d):
-        assert sched_cd.stmt_instance_before.lex_points == [sid_c, 'i', 0, 'j', 0]
-        assert sched_cd.stmt_instance_after.lex_points == [sid_d, 't', 0, 0, 0]
+    def perform_insn_cd_checks_with(c_lex_idx, d_lex_idx):
+        assert sched_cd.stmt_instance_before.lex_points == [
+            c_lex_idx, 'i', 0, 'j', 0]
+        assert sched_cd.stmt_instance_after.lex_points == [d_lex_idx, 't', 0, 0, 0]
 
         # Get two maps from the PairwiseScheduleBuilder
 
@@ -351,22 +370,22 @@ def perform_insn_cd_checks_with(sid_c, sid_d):
 
         sched_map_before_expected = isl.Map(
             "[pi, pj] -> { "
-            "[_lp_linchk_statement=%d, i, j] -> "
+            "[_lp_linchk_statement=0, i, j] -> "
             "[_lp_linchk_l0=%d, _lp_linchk_l1=i, _lp_linchk_l2=0, _lp_linchk_l3=j, "
             "_lp_linchk_l4=0] : "
             "0 <= i < pi and 0 <= j < pj }"
-            % (sid_c, sid_c)
+            % (c_lex_idx)
             )
         sched_map_before_expected = ensure_dim_names_match_and_align(
             sched_map_before_expected, sched_map_before)
 
         sched_map_after_expected = isl.Map(
             "[pt] -> { "
-            "[_lp_linchk_statement=%d, t] -> "
+            "[_lp_linchk_statement=1, t] -> "
             "[_lp_linchk_l0=%d, _lp_linchk_l1=t, _lp_linchk_l2=0, _lp_linchk_l3=0, "
             "_lp_linchk_l4=0] : "
             "0 <= t < pt }"
-            % (sid_d, sid_d)
+            % (d_lex_idx)
             )
         sched_map_after_expected = ensure_dim_names_match_and_align(
             sched_map_after_expected, sched_map_after)
@@ -374,9 +393,11 @@ def perform_insn_cd_checks_with(sid_c, sid_d):
         assert sched_map_before == sched_map_before_expected
         assert sched_map_after == sched_map_after_expected
 
-    if sched_cd.stmt_instance_before.stmt_ref.int_id == 0:
+    if linearized_insn_ord.index("insn_c") < linearized_insn_ord.index("insn_d"):
+        # insn_c was linearized first, check schedule accordingly
         perform_insn_cd_checks_with(0, 1)
     else:
+        # insn_d was linearized first, check schedule accordingly
         perform_insn_cd_checks_with(1, 0)
 
 

From ffdca113b9c9fef44fdf3ff0fa70c77f399d0f2a Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Tue, 7 Jul 2020 00:08:06 -0500
Subject: [PATCH 094/460] eliminate 'dom_inames_ordered' args in build_maps()

---
 loopy/schedule/checker/schedule.py | 27 ++++-----------------------
 1 file changed, 4 insertions(+), 23 deletions(-)

diff --git a/loopy/schedule/checker/schedule.py b/loopy/schedule/checker/schedule.py
index e4a9a5864..bcdbb6346 100644
--- a/loopy/schedule/checker/schedule.py
+++ b/loopy/schedule/checker/schedule.py
@@ -311,8 +311,6 @@ def _pad_lex_tuple_with_zeros(stmt_inst, length):
     def build_maps(
             self,
             knl,
-            dom_inames_ordered_before=None,
-            dom_inames_ordered_after=None,
             ):
         r"""Create a pair of :class:`islpy.Map`\ s representing a pairwise schedule
             as two mappings from statement instances to lexicographic time,
@@ -323,18 +321,6 @@ def build_maps(
             kernel will be used to get the domains associated with the inames
             used in the statements.
 
-        :arg dom_inames_ordered_before: A list of :class:`str`
-            representing the union of inames used in
-            ``stmt_instance_before``. ``statement_var_name`` and
-            ``dom_inames_ordered_before`` are the names of the dims of
-            the space of the ISL map domain.
-
-        :arg dom_inames_ordered_after: A list of :class:`str`
-            representing the union of inames used in
-            ``stmt_instance_after``. ``statement_var_name`` and
-            ``dom_inames_ordered_after`` are the names of the dims of
-            the space of the ISL map domain.
-
         :returns: A two-tuple containing two :class:`islpy.Map`s
             representing the a pairwise schedule as two mappings
             from statement instances to lexicographic time, one for
@@ -352,7 +338,7 @@ def build_maps(
         params_sched = []
         out_names_sched = self.get_lex_var_names()
 
-        def _get_map_for_stmt_inst(stmt_inst, dom_inames_ordered):
+        def _get_map_for_stmt_inst(stmt_inst):
 
             # Get inames domain for statement instance (a BasicSet)
             dom = knl.get_inames_domain(
@@ -361,8 +347,7 @@ def _get_map_for_stmt_inst(stmt_inst, dom_inames_ordered):
             # create space (an isl space in current implementation)
             # {('statement', <inames> used in statement domain>) ->
             #  (lexicographic ordering dims)}
-            if dom_inames_ordered is None:
-                dom_inames_ordered = list_var_names_in_isl_sets([dom])
+            dom_inames_ordered = list_var_names_in_isl_sets([dom])
 
             in_names_sched = [
                 self.statement_var_name] + dom_inames_ordered[:]
@@ -390,12 +375,8 @@ def _get_map_for_stmt_inst(stmt_inst, dom_inames_ordered):
                 space=sched_space,
                 )
 
-        map_before = _get_map_for_stmt_inst(
-            self.stmt_instance_before,
-            dom_inames_ordered_before)
-        map_after = _get_map_for_stmt_inst(
-            self.stmt_instance_after,
-            dom_inames_ordered_after)
+        map_before = _get_map_for_stmt_inst(self.stmt_instance_before)
+        map_after = _get_map_for_stmt_inst(self.stmt_instance_after)
 
         return (map_before, map_after)
 

From 72a91ab7b3c8b3b482eb800b8a65518257ba75e7 Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Tue, 7 Jul 2020 00:58:35 -0500
Subject: [PATCH 095/460] remove class variables statement_var_name and
 lex_var_prefix from schedule class; make them module-level variables that can
 be imported; (reducing state that is maintained in schedule objects)

---
 loopy/schedule/checker/schedule.py | 53 ++++++++++++++++++------------
 1 file changed, 32 insertions(+), 21 deletions(-)

diff --git a/loopy/schedule/checker/schedule.py b/loopy/schedule/checker/schedule.py
index bcdbb6346..6e1faf0a4 100644
--- a/loopy/schedule/checker/schedule.py
+++ b/loopy/schedule/checker/schedule.py
@@ -23,6 +23,34 @@
 import islpy as isl
 
 
+STATEMENT_VAR_NAME = "_lp_linchk_statement"
+
+
+def set_statement_var_name(name):
+    """Set the :class:`str` specifying the name of the variable used
+        to represent the unique :class:`int` statement id in a
+        pairwise schedule.
+    """
+    global STATEMENT_VAR_NAME
+    STATEMENT_VAR_NAME = name
+
+
+LEX_VAR_PREFIX = "_lp_linchk_l"
+
+
+def set_lex_var_prefix(name):
+    """Set the :class:`str` specifying the prefix to be used for the variables
+    representing the dimensions in the lexicographic ordering used in a
+    pairwise schedule.
+
+    E.g., a prefix of "_lp_linchk_lex" might yield lexicographic dimension
+    variables "_lp_linchk_lex0", "_lp_linchk_lex1", "_lp_linchk_lex2". Cf.
+    :ref:`reserved-identifiers`.
+    """
+    global LEX_VAR_PREFIX
+    LEX_VAR_PREFIX = name
+
+
 class StatementRef(object):
     """A reference to a :mod:`loopy` statement.
 
@@ -128,23 +156,8 @@ class PairwiseScheduleBuilder(object):
         in a single lexicographic ordering. Points in lexicographic ordering
         are represented as a list of :class:`int` or as :class:`str`
         :mod:`loopy` inames.
-
-    .. attribute:: statement_var_name
-
-        A :class:`str` specifying the name of the variable used
-        to represent the unique :class:`int` statement id.
-
-    .. attribute:: lex_var_prefix
-
-        A :class:`str` specifying the prefix to be used for the variables
-        representing the dimensions in the lexicographic ordering. E.g.,
-        a prefix of "_lp_linchk_lex" might yield variables "_lp_linchk_lex0",
-        "_lp_linchk_lex1", "_lp_linchk_lex2". Cf. :ref:`reserved-identifiers`.
     """
 
-    statement_var_name = "_lp_linchk_statement"
-    lex_var_prefix = "_lp_linchk_l"
-
     def __init__(
             self,
             linearization_items_ordered,
@@ -349,8 +362,7 @@ def _get_map_for_stmt_inst(stmt_inst):
             #  (lexicographic ordering dims)}
             dom_inames_ordered = list_var_names_in_isl_sets([dom])
 
-            in_names_sched = [
-                self.statement_var_name] + dom_inames_ordered[:]
+            in_names_sched = [STATEMENT_VAR_NAME] + dom_inames_ordered[:]
             sched_space = get_isl_space(
                 params_sched, in_names_sched, out_names_sched)
 
@@ -358,7 +370,7 @@ def _get_map_for_stmt_inst(stmt_inst):
             # for intersection with sched map later
             dom_to_intersect = [
                 add_dims_to_isl_set(
-                    dom, isl.dim_type.set, [self.statement_var_name], 0), ]
+                    dom, isl.dim_type.set, [STATEMENT_VAR_NAME], 0), ]
 
             # Each map representing the schedule will map
             # statement instances -> lex time.
@@ -381,14 +393,13 @@ def _get_map_for_stmt_inst(stmt_inst):
         return (map_before, map_after)
 
     def get_lex_var_names(self):
-        return [self.lex_var_prefix+str(i)
-                for i in range(self.max_lex_dims())]
+        return [LEX_VAR_PREFIX+str(i) for i in range(self.max_lex_dims())]
 
     def __str__(self):
 
         def stringify_sched_stmt_instance(stmt_inst):
             return "{\n[%s=%s,<inames>] -> %s;\n}" % (
-                self.statement_var_name,
+                STATEMENT_VAR_NAME,
                 stmt_inst.stmt_ref.int_id,
                 stmt_inst.lex_points)
 

From 67bbe8b3ac14b7a05be3c14b1f0ab3000ae8987a Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Fri, 10 Jul 2020 06:40:29 -0500
Subject: [PATCH 096/460] remove setters for sched checking constants; make
 __doc__ string; add LIN_CHECK_IDENTIFIER_PREFIX

---
 loopy/schedule/checker/schedule.py | 32 +++++++++++++-----------------
 1 file changed, 14 insertions(+), 18 deletions(-)

diff --git a/loopy/schedule/checker/schedule.py b/loopy/schedule/checker/schedule.py
index 6e1faf0a4..00a9ead51 100644
--- a/loopy/schedule/checker/schedule.py
+++ b/loopy/schedule/checker/schedule.py
@@ -22,33 +22,29 @@
 
 import islpy as isl
 
+__doc__ = """
 
-STATEMENT_VAR_NAME = "_lp_linchk_statement"
+.. data:: LIN_CHECK_IDENTIFIER_PREFIX
 
+    The prefix for identifiers involved in linearization checking.
 
-def set_statement_var_name(name):
-    """Set the :class:`str` specifying the name of the variable used
-        to represent the unique :class:`int` statement id in a
-        pairwise schedule.
-    """
-    global STATEMENT_VAR_NAME
-    STATEMENT_VAR_NAME = name
-
+.. data:: LEX_VAR_PREFIX
 
-LEX_VAR_PREFIX = "_lp_linchk_l"
+    E.g., a prefix of "_lp_linchk_lex" might yield lexicographic dimension
+    variables "_lp_linchk_lex0", "_lp_linchk_lex1", "_lp_linchk_lex2". Cf.
+    :ref:`reserved-identifiers`.
 
+.. data:: STATEMENT_VAR_NAME
 
-def set_lex_var_prefix(name):
-    """Set the :class:`str` specifying the prefix to be used for the variables
+    Set the :class:`str` specifying the prefix to be used for the variables
     representing the dimensions in the lexicographic ordering used in a
     pairwise schedule.
 
-    E.g., a prefix of "_lp_linchk_lex" might yield lexicographic dimension
-    variables "_lp_linchk_lex0", "_lp_linchk_lex1", "_lp_linchk_lex2". Cf.
-    :ref:`reserved-identifiers`.
-    """
-    global LEX_VAR_PREFIX
-    LEX_VAR_PREFIX = name
+"""
+
+LIN_CHECK_IDENTIFIER_PREFIX = "_lp_linchk_"
+LEX_VAR_PREFIX = "%sl" % (LIN_CHECK_IDENTIFIER_PREFIX)
+STATEMENT_VAR_NAME = "%sstatement" % (LIN_CHECK_IDENTIFIER_PREFIX)
 
 
 class StatementRef(object):

From b54ed6f839c88618582085dc6d1393c5c3656a1a Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Fri, 10 Jul 2020 07:21:19 -0500
Subject: [PATCH 097/460] use STATEMENT_VAR_NAME and LEX_VAR_PREFIX constants
 when building test maps; use function to make test map creation easier

---
 test/test_linearization_checker.py | 139 +++++++++++++++--------------
 1 file changed, 71 insertions(+), 68 deletions(-)

diff --git a/test/test_linearization_checker.py b/test/test_linearization_checker.py
index 520efba9b..d3042b36c 100644
--- a/test/test_linearization_checker.py
+++ b/test/test_linearization_checker.py
@@ -35,6 +35,10 @@
     preprocess_kernel,
     get_one_linearized_kernel,
 )
+from loopy.schedule.checker.schedule import (
+    LEX_VAR_PREFIX,
+    STATEMENT_VAR_NAME,
+)
 
 logger = logging.getLogger(__name__)
 
@@ -139,6 +143,13 @@ def test_lexschedule_and_map_creation():
         if isinstance(item, RunInstruction):
             linearized_insn_ord.append(item.insn_id)
 
+    def _lex_space_string(dim_vals):
+        # Return a string describing lex space dimension assignments
+        # (used to create maps below)
+        return ", ".join(
+            ["%s%d=%s" % (LEX_VAR_PREFIX, idx, str(val))
+            for idx, val in enumerate(dim_vals)])
+
     # Relationship between insn_a and insn_b ---------------------------------------
 
     assert sched_ab.stmt_instance_before.lex_points == [0, 'i', 0, 'k', 0]
@@ -151,21 +162,21 @@ def test_lexschedule_and_map_creation():
     # Create expected maps, align, compare
 
     sched_map_before_expected = isl.Map(
-        "[pi, pk] -> { "
-        "[_lp_linchk_statement=0, i, k] -> "
-        "[_lp_linchk_l0=0, _lp_linchk_l1=i, _lp_linchk_l2=0, _lp_linchk_l3=k, "
-        "_lp_linchk_l4=0] : "
-        "0 <= i < pi and 0 <= k < pk }"
+        "[pi, pk] -> { [%s=0, i, k] -> [%s] : 0 <= i < pi and 0 <= k < pk }"
+        % (
+            STATEMENT_VAR_NAME,
+            _lex_space_string(["0", "i", "0", "k", "0"]),
+            )
         )
     sched_map_before_expected = ensure_dim_names_match_and_align(
         sched_map_before_expected, sched_map_before)
 
     sched_map_after_expected = isl.Map(
-        "[pi, pj] -> { "
-        "[_lp_linchk_statement=1, i, j] -> "
-        "[_lp_linchk_l0=0, _lp_linchk_l1=i, _lp_linchk_l2=1, _lp_linchk_l3=j, "
-        "_lp_linchk_l4=0] : "
-        "0 <= i < pi and 0 <= j < pj }"
+        "[pi, pj] -> { [%s=1, i, j] -> [%s] : 0 <= i < pi and 0 <= j < pj }"
+        % (
+            STATEMENT_VAR_NAME,
+            _lex_space_string(["0", "i", "1", "j", "0"]),
+            )
         )
     sched_map_after_expected = ensure_dim_names_match_and_align(
         sched_map_after_expected, sched_map_after)
@@ -186,21 +197,21 @@ def test_lexschedule_and_map_creation():
     # Create expected maps, align, compare
 
     sched_map_before_expected = isl.Map(
-        "[pi, pk] -> { "
-        "[_lp_linchk_statement=0, i, k] -> "
-        "[_lp_linchk_l0=0, _lp_linchk_l1=i, _lp_linchk_l2=0, _lp_linchk_l3=k, "
-        "_lp_linchk_l4=0] : "
-        "0 <= i < pi and 0 <= k < pk }"
+        "[pi, pk] -> { [%s=0, i, k] -> [%s] : 0 <= i < pi and 0 <= k < pk }"
+        % (
+            STATEMENT_VAR_NAME,
+            _lex_space_string(["0", "i", "0", "k", "0"]),
+            )
         )
     sched_map_before_expected = ensure_dim_names_match_and_align(
         sched_map_before_expected, sched_map_before)
 
     sched_map_after_expected = isl.Map(
-        "[pi, pj] -> { "
-        "[_lp_linchk_statement=1, i, j] -> "
-        "[_lp_linchk_l0=0, _lp_linchk_l1=i, _lp_linchk_l2=1, _lp_linchk_l3=j, "
-        "_lp_linchk_l4=0] : "
-        "0 <= i < pi and 0 <= j < pj }"
+        "[pi, pj] -> { [%s=1, i, j] -> [%s] : 0 <= i < pi and 0 <= j < pj }"
+        % (
+            STATEMENT_VAR_NAME,
+            _lex_space_string(["0", "i", "1", "j", "0"]),
+            )
         )
     sched_map_after_expected = ensure_dim_names_match_and_align(
         sched_map_after_expected, sched_map_after)
@@ -225,23 +236,21 @@ def perform_insn_ad_checks_with(a_lex_idx, d_lex_idx):
         # Create expected maps, align, compare
 
         sched_map_before_expected = isl.Map(
-            "[pi, pk] -> { "
-            "[_lp_linchk_statement=0, i, k] -> "
-            "[_lp_linchk_l0=%d, _lp_linchk_l1=i, _lp_linchk_l2=0, _lp_linchk_l3=k, "
-            "_lp_linchk_l4=0] : "
-            "0 <= i < pi and 0 <= k < pk }"
-            % (a_lex_idx)
+            "[pi, pk] -> { [%s=0, i, k] -> [%s] : 0 <= i < pi and 0 <= k < pk }"
+            % (
+                STATEMENT_VAR_NAME,
+                _lex_space_string([a_lex_idx, "i", "0", "k", "0"]),
+                )
             )
         sched_map_before_expected = ensure_dim_names_match_and_align(
             sched_map_before_expected, sched_map_before)
 
         sched_map_after_expected = isl.Map(
-            "[pt] -> { "
-            "[_lp_linchk_statement=1, t] -> "
-            "[_lp_linchk_l0=%d, _lp_linchk_l1=t, _lp_linchk_l2=0, _lp_linchk_l3=0, "
-            "_lp_linchk_l4=0] : "
-            "0 <= t < pt }"
-            % (d_lex_idx)
+            "[pt] -> { [%s=1, t] -> [%s] : 0 <= t < pt }"
+            % (
+                STATEMENT_VAR_NAME,
+                _lex_space_string([d_lex_idx, "t", "0", "0", "0"]),
+                )
             )
         sched_map_after_expected = ensure_dim_names_match_and_align(
             sched_map_after_expected, sched_map_after)
@@ -273,23 +282,21 @@ def perform_insn_bc_checks_with(b_lex_idx, c_lex_idx):
         # Create expected maps, align, compare
 
         sched_map_before_expected = isl.Map(
-            "[pi, pj] -> { "
-            "[_lp_linchk_statement=0, i, j] -> "
-            "[_lp_linchk_l0=0, _lp_linchk_l1=i, _lp_linchk_l2=0, _lp_linchk_l3=j, "
-            "_lp_linchk_l4=%d] : "
-            "0 <= i < pi and 0 <= j < pj }"
-            % (b_lex_idx)
+            "[pi, pj] -> { [%s=0, i, j] -> [%s] : 0 <= i < pi and 0 <= j < pj }"
+            % (
+                STATEMENT_VAR_NAME,
+                _lex_space_string(["0", "i", "0", "j", b_lex_idx]),
+                )
             )
         sched_map_before_expected = ensure_dim_names_match_and_align(
             sched_map_before_expected, sched_map_before)
 
         sched_map_after_expected = isl.Map(
-            "[pi, pj] -> { "
-            "[_lp_linchk_statement=1, i, j] -> "
-            "[_lp_linchk_l0=0, _lp_linchk_l1=i, _lp_linchk_l2=0, _lp_linchk_l3=j, "
-            "_lp_linchk_l4=%d] : "
-            "0 <= i < pi and 0 <= j < pj }"
-            % (c_lex_idx)
+            "[pi, pj] -> { [%s=1, i, j] -> [%s] : 0 <= i < pi and 0 <= j < pj }"
+            % (
+                STATEMENT_VAR_NAME,
+                _lex_space_string(["0", "i", "0", "j", c_lex_idx]),
+                )
             )
         sched_map_after_expected = ensure_dim_names_match_and_align(
             sched_map_after_expected, sched_map_after)
@@ -321,23 +328,21 @@ def perform_insn_bd_checks_with(b_lex_idx, d_lex_idx):
         # Create expected maps, align, compare
 
         sched_map_before_expected = isl.Map(
-            "[pi, pj] -> { "
-            "[_lp_linchk_statement=0, i, j] -> "
-            "[_lp_linchk_l0=%d, _lp_linchk_l1=i, _lp_linchk_l2=0, _lp_linchk_l3=j, "
-            "_lp_linchk_l4=0] : "
-            "0 <= i < pi and 0 <= j < pj }"
-            % (b_lex_idx)
+            "[pi, pj] -> { [%s=0, i, j] -> [%s] : 0 <= i < pi and 0 <= j < pj }"
+            % (
+                STATEMENT_VAR_NAME,
+                _lex_space_string([b_lex_idx, "i", "0", "j", "0"]),
+                )
             )
         sched_map_before_expected = ensure_dim_names_match_and_align(
             sched_map_before_expected, sched_map_before)
 
         sched_map_after_expected = isl.Map(
-            "[pt] -> { "
-            "[_lp_linchk_statement=1, t] -> "
-            "[_lp_linchk_l0=%d, _lp_linchk_l1=t, _lp_linchk_l2=0, _lp_linchk_l3=0, "
-            "_lp_linchk_l4=0] : "
-            "0 <= t < pt }"
-            % (d_lex_idx)
+            "[pt] -> { [%s=1, t] -> [%s] : 0 <= t < pt }"
+            % (
+                STATEMENT_VAR_NAME,
+                _lex_space_string([d_lex_idx, "t", "0", "0", "0"]),
+                )
             )
         sched_map_after_expected = ensure_dim_names_match_and_align(
             sched_map_after_expected, sched_map_after)
@@ -369,23 +374,21 @@ def perform_insn_cd_checks_with(c_lex_idx, d_lex_idx):
         # Create expected maps, align, compare
 
         sched_map_before_expected = isl.Map(
-            "[pi, pj] -> { "
-            "[_lp_linchk_statement=0, i, j] -> "
-            "[_lp_linchk_l0=%d, _lp_linchk_l1=i, _lp_linchk_l2=0, _lp_linchk_l3=j, "
-            "_lp_linchk_l4=0] : "
-            "0 <= i < pi and 0 <= j < pj }"
-            % (c_lex_idx)
+            "[pi, pj] -> { [%s=0, i, j] -> [%s] : 0 <= i < pi and 0 <= j < pj }"
+            % (
+                STATEMENT_VAR_NAME,
+                _lex_space_string([c_lex_idx, "i", "0", "j", "0"]),
+                )
             )
         sched_map_before_expected = ensure_dim_names_match_and_align(
             sched_map_before_expected, sched_map_before)
 
         sched_map_after_expected = isl.Map(
-            "[pt] -> { "
-            "[_lp_linchk_statement=1, t] -> "
-            "[_lp_linchk_l0=%d, _lp_linchk_l1=t, _lp_linchk_l2=0, _lp_linchk_l3=0, "
-            "_lp_linchk_l4=0] : "
-            "0 <= t < pt }"
-            % (d_lex_idx)
+            "[pt] -> { [%s=1, t] -> [%s] : 0 <= t < pt }"
+            % (
+                STATEMENT_VAR_NAME,
+                _lex_space_string([d_lex_idx, "t", "0", "0", "0"]),
+                )
             )
         sched_map_after_expected = ensure_dim_names_match_and_align(
             sched_map_after_expected, sched_map_after)

From c549f652e739af191d0297e5b2621bdbe33d44a2 Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Fri, 10 Jul 2020 07:33:43 -0500
Subject: [PATCH 098/460] use STATEMENT_VAR_NAME and LEX_VAR_PREFIX constants
 when building test maps

---
 test/test_linearization_checker.py | 76 ++++++++++++------------------
 1 file changed, 31 insertions(+), 45 deletions(-)

diff --git a/test/test_linearization_checker.py b/test/test_linearization_checker.py
index 2dc12b451..208d9350e 100644
--- a/test/test_linearization_checker.py
+++ b/test/test_linearization_checker.py
@@ -494,35 +494,21 @@ def check_sio_for_insn_pair(
 
         assert sio_aligned == expected_sio
 
-    expected_lex_order_map = isl.Map("{ "
-        "[_lp_linchk_l0', _lp_linchk_l1', _lp_linchk_l2', _lp_linchk_l3', "
-        "_lp_linchk_l4']"
-        " -> "
-        "[_lp_linchk_l0, _lp_linchk_l1, _lp_linchk_l2, _lp_linchk_l3, "
-        "_lp_linchk_l4]"
-        ":"
+    expected_lex_order_map = isl.Map(
+        "{{ "
+        "[{0}0', {0}1', {0}2', {0}3', {0}4'] -> [{0}0, {0}1, {0}2, {0}3, {0}4] :"
         "("
-        "_lp_linchk_l0' < _lp_linchk_l0 "
+        "{0}0' < {0}0 "
         ") or ("
-        "_lp_linchk_l0'= _lp_linchk_l0 and "
-        "_lp_linchk_l1' < _lp_linchk_l1 "
+        "{0}0'={0}0 and {0}1' < {0}1 "
         ") or ("
-        "_lp_linchk_l0'= _lp_linchk_l0 and "
-        "_lp_linchk_l1'= _lp_linchk_l1 and "
-        "_lp_linchk_l2' < _lp_linchk_l2 "
+        "{0}0'={0}0 and {0}1'={0}1 and {0}2' < {0}2 "
         ") or ("
-        "_lp_linchk_l0'= _lp_linchk_l0 and "
-        "_lp_linchk_l1'= _lp_linchk_l1 and "
-        "_lp_linchk_l2'= _lp_linchk_l2 and "
-        "_lp_linchk_l3' < _lp_linchk_l3 "
+        "{0}0'={0}0 and {0}1'={0}1 and {0}2'={0}2 and {0}3' < {0}3 "
         ") or ("
-        "_lp_linchk_l0'= _lp_linchk_l0 and "
-        "_lp_linchk_l1'= _lp_linchk_l1 and "
-        "_lp_linchk_l2'= _lp_linchk_l2 and "
-        "_lp_linchk_l3'= _lp_linchk_l3 and "
-        "_lp_linchk_l4' < _lp_linchk_l4"
+        "{0}0'={0}0 and {0}1'={0}1 and {0}2'={0}2 and {0}3'={0}3 and {0}4' < {0}4"
         ")"
-        "}")
+        "}}".format(LEX_VAR_PREFIX))
 
     # Isl ignores these apostrophes, but test would still pass since it ignores
     # variable names when checking for equality. Even so, explicitly add apostrophes
@@ -533,12 +519,12 @@ def check_sio_for_insn_pair(
     # Relationship between insn_a and insn_b ---------------------------------------
 
     expected_sio = isl.Map(
-        "[pi, pj, pk] -> { "
-        "[_lp_linchk_statement'=0, i', k'] -> [_lp_linchk_statement=1, i, j]:"
+        "[pi, pj, pk] -> {{ "
+        "[{0}'=0, i', k'] -> [{0}=1, i, j] : "
         "0 <= i' < pi and 0 <= k' < pk and 0 <= j < pj and 0 <= i < pi and i > i'; "
-        "[_lp_linchk_statement'=0, i', k'] -> [_lp_linchk_statement=1, i=i', j]:"
+        "[{0}'=0, i', k'] -> [{0}=1, i=i', j] : "
         "0 <= i' < pi and 0 <= k' < pk and 0 <= j < pj "
-        "}"
+        "}}".format(STATEMENT_VAR_NAME)
         )
     # isl ignores these apostrophes, so explicitly add them
     expected_sio = append_marker_to_isl_map_var_names(
@@ -550,12 +536,12 @@ def check_sio_for_insn_pair(
     # Relationship between insn_a and insn_c ---------------------------------------
 
     expected_sio = isl.Map(
-        "[pi, pj, pk] -> { "
-        "[_lp_linchk_statement'=0, i', k'] -> [_lp_linchk_statement=1, i, j]:"
+        "[pi, pj, pk] -> {{ "
+        "[{0}'=0, i', k'] -> [{0}=1, i, j] : "
         "0 <= i' < pi and 0 <= k' < pk and 0 <= j < pj and 0 <= i < pi and i > i'; "
-        "[_lp_linchk_statement'=0, i', k'] -> [_lp_linchk_statement=1, i=i', j]:"
+        "[{0}'=0, i', k'] -> [{0}=1, i=i', j] : "
         "0 <= i' < pi and 0 <= k' < pk and 0 <= j < pj "
-        "}"
+        "}}".format(STATEMENT_VAR_NAME)
         )
     # isl ignores these apostrophes, so explicitly add them
     expected_sio = append_marker_to_isl_map_var_names(
@@ -567,10 +553,10 @@ def check_sio_for_insn_pair(
     # Relationship between insn_a and insn_d ---------------------------------------
 
     expected_sio = isl.Map(
-        "[pt, pi, pk] -> { "
-        "[_lp_linchk_statement'=0, i', k'] -> [_lp_linchk_statement=1, t]:"
+        "[pt, pi, pk] -> {{ "
+        "[{0}'=0, i', k'] -> [{0}=1, t] : "
         "0 <= i' < pi and 0 <= k' < pk and 0 <= t < pt "
-        "}"
+        "}}".format(STATEMENT_VAR_NAME)
         )
     # isl ignores these apostrophes, so explicitly add them
     expected_sio = append_marker_to_isl_map_var_names(
@@ -582,14 +568,14 @@ def check_sio_for_insn_pair(
     # Relationship between insn_b and insn_c ---------------------------------------
 
     expected_sio = isl.Map(
-        "[pi, pj] -> { "
-        "[_lp_linchk_statement'=0, i', j'] -> [_lp_linchk_statement=1, i, j]:"
+        "[pi, pj] -> {{ "
+        "[{0}'=0, i', j'] -> [{0}=1, i, j] : "
         "0 <= i' < pi and 0 <= j' < pj and i > i' and 0 <= i < pi and 0 <= j < pj; "
-        "[_lp_linchk_statement'=0, i', j'] -> [_lp_linchk_statement=1, i=i', j]:"
+        "[{0}'=0, i', j'] -> [{0}=1, i=i', j] : "
         "0 <= i' < pi and 0 <= j' < pj and j > j' and 0 <= j < pj; "
-        "[_lp_linchk_statement'=0, i', j'] -> [_lp_linchk_statement=1, i=i', j=j']:"
+        "[{0}'=0, i', j'] -> [{0}=1, i=i', j=j'] : "
         "0 <= i' < pi and 0 <= j' < pj "
-        "}"
+        "}}".format(STATEMENT_VAR_NAME)
         )
     # isl ignores these apostrophes, so explicitly add them
     expected_sio = append_marker_to_isl_map_var_names(
@@ -601,10 +587,10 @@ def check_sio_for_insn_pair(
     # Relationship between insn_b and insn_d ---------------------------------------
 
     expected_sio = isl.Map(
-        "[pt, pi, pj] -> { "
-        "[_lp_linchk_statement'=0, i', j'] -> [_lp_linchk_statement=1, t]:"
+        "[pt, pi, pj] -> {{ "
+        "[{0}'=0, i', j'] -> [{0}=1, t] : "
         "0 <= i' < pi and 0 <= j' < pj and 0 <= t < pt "
-        "}"
+        "}}".format(STATEMENT_VAR_NAME)
         )
     # isl ignores these apostrophes, so explicitly add them
     expected_sio = append_marker_to_isl_map_var_names(
@@ -616,10 +602,10 @@ def check_sio_for_insn_pair(
     # Relationship between insn_c and insn_d ---------------------------------------
 
     expected_sio = isl.Map(
-        "[pt, pi, pj] -> { "
-        "[_lp_linchk_statement'=0, i', j'] -> [_lp_linchk_statement=1, t]:"
+        "[pt, pi, pj] -> {{ "
+        "[{0}'=0, i', j'] -> [{0}=1, t] : "
         "0 <= i' < pi and 0 <= j' < pj and 0 <= t < pt "
-        "}"
+        "}}".format(STATEMENT_VAR_NAME)
         )
     # isl ignores these apostrophes, so explicitly add them
     expected_sio = append_marker_to_isl_map_var_names(

From b2c589713f7cc152650b5b3dec50bdfa9fb59b9d Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Fri, 10 Jul 2020 08:28:25 -0500
Subject: [PATCH 099/460] remove StatementRef and change
 StatementInstanceSet.stmt_ref to just StatementInstanceSet.insn_id

---
 loopy/schedule/checker/schedule.py | 116 +++++++++--------------------
 1 file changed, 37 insertions(+), 79 deletions(-)

diff --git a/loopy/schedule/checker/schedule.py b/loopy/schedule/checker/schedule.py
index 00a9ead51..db35ec124 100644
--- a/loopy/schedule/checker/schedule.py
+++ b/loopy/schedule/checker/schedule.py
@@ -47,55 +47,6 @@
 STATEMENT_VAR_NAME = "%sstatement" % (LIN_CHECK_IDENTIFIER_PREFIX)
 
 
-class StatementRef(object):
-    """A reference to a :mod:`loopy` statement.
-
-    .. attribute:: insn_id
-
-        A :class:`str` specifying the :mod:`loopy` instruction id
-        for this statement.
-
-    .. attribute:: int_id
-
-        A :class:`int` uniquely identifying the statement within a
-        :class:`PairwiseScheduleBuilder`. A :class:`PairwiseScheduleBuilder`
-        builds a mapping from points in a space of statement instances to
-        points in a lexicographic ordering. The `statement` dimension of a
-        point in the statement instance space representing an instance of
-        this statement is assigned this value.
-
-    """
-
-    def __init__(
-            self,
-            insn_id,
-            int_id=None,
-            ):
-        self.insn_id = insn_id
-        self.int_id = int_id
-
-    def __eq__(self, other):
-        return (
-            self.insn_id == other.insn_id
-            and self.int_id == other.int_id
-            )
-
-    def update_persistent_hash(self, key_hash, key_builder):
-        """Custom hash computation function for use with
-        :class:`pytools.persistent_dict.PersistentDict`.
-        """
-
-        key_builder.rec(key_hash, self.insn_id)
-        key_builder.rec(key_hash, self.int_id)
-
-    def __str__(self):
-        if self.int_id is not None:
-            int_id = ":%d" % (self.int_id)
-        else:
-            int_id = ""
-        return "%s(%s%s)" % (self.__class__.__name__, self.insn_id, int_id)
-
-
 class StatementInstanceSet(object):
     """A representation of a set of (non-concurrent) instances of a
     statement being executed. The ordering of the instances is described
@@ -103,9 +54,10 @@ class StatementInstanceSet(object):
     lexicographic ordering of statements. Each field in the list
     corresponds to a dimension in the lexicographic ordering.
 
-    .. attribute:: stmt_ref
+    .. attribute:: insn_id
 
-        A :class:`StatementRef`.
+        A :class:`str` instruction identifier that is unique within
+        a :class:`loopy.kernel.LoopKernel`.
 
     .. attribute:: lex_points
 
@@ -116,15 +68,15 @@ class StatementInstanceSet(object):
 
     def __init__(
             self,
-            stmt_ref,
+            insn_id,
             lex_points,
             ):
-        self.stmt_ref = stmt_ref
+        self.insn_id = insn_id
         self.lex_points = lex_points
 
     def __repr__(self):
         return "%s(%s, %s)" % (
-            self.__class__.__name__, self.stmt_ref, self.lex_points)
+            self.__class__.__name__, self.insn_id, self.lex_points)
 
 
 class PairwiseScheduleBuilder(object):
@@ -177,12 +129,6 @@ def __init__(
         self.stmt_instance_before = None
         self.stmt_instance_after = None
 
-        # Determine integer IDs that will represent each statement in mapping
-        # (dependency map creation assumes sid_before=0 and sid_after=1, unless
-        # before and after refer to same stmt, in which case sid_before=sid_after=0)
-        int_sid_before = 0
-        int_sid_after = 0 if before_insn_id == after_insn_id else 1
-
         # TODO when/after dependencies are added, consider the possibility
         # of removing the two-statements-per-PairwiseScheduleBuilder limitation
 
@@ -254,20 +200,14 @@ def __init__(
                 if lp_insn_id == before_insn_id:
                     # add before sched item
                     self.stmt_instance_before = StatementInstanceSet(
-                            StatementRef(
-                                insn_id=lp_insn_id,
-                                int_id=int_sid_before,  # int representing insn
-                                ),
+                            lp_insn_id,
                             next_insn_lex_tuple[:])
                     stmt_added = True
 
                 if lp_insn_id == after_insn_id:
                     # add after sched item
                     self.stmt_instance_after = StatementInstanceSet(
-                            StatementRef(
-                                insn_id=lp_insn_id,
-                                int_id=int_sid_after,  # int representing insn
-                                ),
+                            lp_insn_id,
                             next_insn_lex_tuple[:])
                     stmt_added = True
 
@@ -300,13 +240,13 @@ def max_lex_dims(self):
     def pad_lex_tuples_with_zeros(self):
         """Find the maximum number of lexicographic dimensions represented
             in the lexicographic ordering, and if any
-            :class:`StatementRef` maps to a lex point tuple with
+            :class:`StatementInstanceSet` maps to a lex point tuple with
             fewer dimensions, add a zero for each of the missing dimensions.
         """
 
         def _pad_lex_tuple_with_zeros(stmt_inst, length):
             return StatementInstanceSet(
-                stmt_inst.stmt_ref,
+                stmt_inst.insn_id,
                 stmt_inst.lex_points[:] + [0]*(length-len(stmt_inst.lex_points)),
                 )
 
@@ -347,11 +287,11 @@ def build_maps(
         params_sched = []
         out_names_sched = self.get_lex_var_names()
 
-        def _get_map_for_stmt_inst(stmt_inst):
+        def _get_map_for_stmt_inst(stmt_inst, int_sid):
 
             # Get inames domain for statement instance (a BasicSet)
             dom = knl.get_inames_domain(
-                knl.id_to_insn[stmt_inst.stmt_ref.insn_id].within_inames)
+                knl.id_to_insn[stmt_inst.insn_id].within_inames)
 
             # create space (an isl space in current implementation)
             # {('statement', <inames> used in statement domain>) ->
@@ -373,7 +313,7 @@ def _get_map_for_stmt_inst(stmt_inst):
             # Right now, statement instance tuples consist of single int.
             # Add all inames from domains to each map domain tuple.
             tuple_pair = [(
-                (stmt_inst.stmt_ref.int_id, ) + tuple(dom_inames_ordered),
+                (int_sid, ) + tuple(dom_inames_ordered),
                 stmt_inst.lex_points
                 )]
 
@@ -383,8 +323,18 @@ def _get_map_for_stmt_inst(stmt_inst):
                 space=sched_space,
                 )
 
-        map_before = _get_map_for_stmt_inst(self.stmt_instance_before)
-        map_after = _get_map_for_stmt_inst(self.stmt_instance_after)
+        # Determine integer IDs that will represent each statement in mapping
+        # (dependency map creation assumes sid_before=0 and sid_after=1, unless
+        # before and after refer to same stmt, in which case sid_before=sid_after=0)
+        int_sid_before = 0
+        int_sid_after = 0 if (
+            self.stmt_instance_before.insn_id == self.stmt_instance_after.insn_id
+            ) else 1
+
+        map_before = _get_map_for_stmt_inst(
+            self.stmt_instance_before, int_sid_before)
+        map_after = _get_map_for_stmt_inst(
+            self.stmt_instance_after, int_sid_after)
 
         return (map_before, map_after)
 
@@ -393,13 +343,21 @@ def get_lex_var_names(self):
 
     def __str__(self):
 
-        def stringify_sched_stmt_instance(stmt_inst):
+        def stringify_sched_stmt_instance(stmt_inst, int_sid):
             return "{\n[%s=%s,<inames>] -> %s;\n}" % (
                 STATEMENT_VAR_NAME,
-                stmt_inst.stmt_ref.int_id,
+                int_sid,
                 stmt_inst.lex_points)
 
+        # TODO once we change class -> funcs, this repetition of logic will disappear
+        int_sid_before = 0
+        int_sid_after = 0 if (
+            self.stmt_instance_before.insn_id == self.stmt_instance_after.insn_id
+            ) else 1
+
         return "%s(\nBefore: %s\nAfter: %s\n)" % (
             self.__class__.__name__,
-            stringify_sched_stmt_instance(self.stmt_instance_before),
-            stringify_sched_stmt_instance(self.stmt_instance_after))
+            stringify_sched_stmt_instance(
+                self.stmt_instance_before, int_sid_before),
+            stringify_sched_stmt_instance(
+                self.stmt_instance_after, int_sid_after))

From b4e25dde21b214f5d5fa22c54fe8be1b0ad1664f Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Fri, 10 Jul 2020 08:30:22 -0500
Subject: [PATCH 100/460] rename
 PairwiseScheduleBuilder.statement_instance_before/after to
 PairwiseScheduleBuilder.statement_instance_set_before/after (since they're
 not just a single instance)

---
 loopy/schedule/checker/schedule.py | 49 ++++++++++++++++--------------
 test/test_linearization_checker.py | 28 +++++++++--------
 2 files changed, 42 insertions(+), 35 deletions(-)

diff --git a/loopy/schedule/checker/schedule.py b/loopy/schedule/checker/schedule.py
index db35ec124..04d1315e1 100644
--- a/loopy/schedule/checker/schedule.py
+++ b/loopy/schedule/checker/schedule.py
@@ -87,19 +87,19 @@ class PairwiseScheduleBuilder(object):
     :func:`loopy.schedule.checker.get_schedule_for_statement_pair` is the
     preferred method of creating a PairwiseScheduleBuilder.
 
-    .. attribute:: stmt_instance_before
+    .. attribute:: stmt_instance_set_before
 
         A :class:`StatementInstanceSet` whose ordering relative
-        to `stmt_instance_after is described by PairwiseScheduleBuilder. This
+        to `stmt_instance_set_after is described by PairwiseScheduleBuilder. This
         is achieved by mapping the statement instances in both sets to points
         in a single lexicographic ordering. Points in lexicographic ordering
         are represented as a list of :class:`int` or as :class:`str`
         :mod:`loopy` inames.
 
-    .. attribute:: stmt_instance_after
+    .. attribute:: stmt_instance_set_after
 
         A :class:`StatementInstanceSet` whose ordering relative
-        to `stmt_instance_before is described by PairwiseScheduleBuilder. This
+        to `stmt_instance_set_before is described by PairwiseScheduleBuilder. This
         is achieved by mapping the statement instances in both sets to points
         in a single lexicographic ordering. Points in lexicographic ordering
         are represented as a list of :class:`int` or as :class:`str`
@@ -118,7 +118,7 @@ def __init__(
             order will be described by this :class:`PairwiseScheduleBuilder`.
 
         :arg before_insn_id: A :class:`str` instruction id specifying
-            stmt_instance_before in this pair of instructions.
+            stmt_instance_set_before in this pair of instructions.
 
         :arg after_insn_id: A :class:`str` instruction id specifying
             stmt_instancce_after in this pair of instructions.
@@ -126,8 +126,8 @@ def __init__(
         """
 
         # PairwiseScheduleBuilder statements
-        self.stmt_instance_before = None
-        self.stmt_instance_after = None
+        self.stmt_instance_set_before = None
+        self.stmt_instance_set_after = None
 
         # TODO when/after dependencies are added, consider the possibility
         # of removing the two-statements-per-PairwiseScheduleBuilder limitation
@@ -199,14 +199,14 @@ def __init__(
 
                 if lp_insn_id == before_insn_id:
                     # add before sched item
-                    self.stmt_instance_before = StatementInstanceSet(
+                    self.stmt_instance_set_before = StatementInstanceSet(
                             lp_insn_id,
                             next_insn_lex_tuple[:])
                     stmt_added = True
 
                 if lp_insn_id == after_insn_id:
                     # add after sched item
-                    self.stmt_instance_after = StatementInstanceSet(
+                    self.stmt_instance_set_after = StatementInstanceSet(
                             lp_insn_id,
                             next_insn_lex_tuple[:])
                     stmt_added = True
@@ -224,7 +224,7 @@ def __init__(
             else:
                 pass
             # to save time, stop when we've created both statements
-            if self.stmt_instance_before and self.stmt_instance_after:
+            if self.stmt_instance_set_before and self.stmt_instance_set_after:
                 break
 
         # At this point, pairwise sub-schedule may contain lex point tuples
@@ -234,8 +234,8 @@ def __init__(
 
     def max_lex_dims(self):
         return max([
-            len(self.stmt_instance_before.lex_points),
-            len(self.stmt_instance_after.lex_points)])
+            len(self.stmt_instance_set_before.lex_points),
+            len(self.stmt_instance_set_after.lex_points)])
 
     def pad_lex_tuples_with_zeros(self):
         """Find the maximum number of lexicographic dimensions represented
@@ -252,10 +252,10 @@ def _pad_lex_tuple_with_zeros(stmt_inst, length):
 
         max_lex_dim = self.max_lex_dims()
 
-        self.stmt_instance_before = _pad_lex_tuple_with_zeros(
-            self.stmt_instance_before, max_lex_dim)
-        self.stmt_instance_after = _pad_lex_tuple_with_zeros(
-            self.stmt_instance_after, max_lex_dim)
+        self.stmt_instance_set_before = _pad_lex_tuple_with_zeros(
+            self.stmt_instance_set_before, max_lex_dim)
+        self.stmt_instance_set_after = _pad_lex_tuple_with_zeros(
+            self.stmt_instance_set_after, max_lex_dim)
 
     def build_maps(
             self,
@@ -263,7 +263,8 @@ def build_maps(
             ):
         r"""Create a pair of :class:`islpy.Map`\ s representing a pairwise schedule
             as two mappings from statement instances to lexicographic time,
-            one for ``stmt_instance_before`` and one for ``stmt_instance_after``.
+            one for ``stmt_instance_set_before`` and one for
+            ``stmt_instance_set_after``.
 
         :arg knl: A :class:`loopy.kernel.LoopKernel` containing the
             linearization items that are described by the schedule. This
@@ -328,13 +329,14 @@ def _get_map_for_stmt_inst(stmt_inst, int_sid):
         # before and after refer to same stmt, in which case sid_before=sid_after=0)
         int_sid_before = 0
         int_sid_after = 0 if (
-            self.stmt_instance_before.insn_id == self.stmt_instance_after.insn_id
+            self.stmt_instance_set_before.insn_id ==
+            self.stmt_instance_set_after.insn_id
             ) else 1
 
         map_before = _get_map_for_stmt_inst(
-            self.stmt_instance_before, int_sid_before)
+            self.stmt_instance_set_before, int_sid_before)
         map_after = _get_map_for_stmt_inst(
-            self.stmt_instance_after, int_sid_after)
+            self.stmt_instance_set_after, int_sid_after)
 
         return (map_before, map_after)
 
@@ -352,12 +354,13 @@ def stringify_sched_stmt_instance(stmt_inst, int_sid):
         # TODO once we change class -> funcs, this repetition of logic will disappear
         int_sid_before = 0
         int_sid_after = 0 if (
-            self.stmt_instance_before.insn_id == self.stmt_instance_after.insn_id
+            self.stmt_instance_set_before.insn_id ==
+            self.stmt_instance_set_after.insn_id
             ) else 1
 
         return "%s(\nBefore: %s\nAfter: %s\n)" % (
             self.__class__.__name__,
             stringify_sched_stmt_instance(
-                self.stmt_instance_before, int_sid_before),
+                self.stmt_instance_set_before, int_sid_before),
             stringify_sched_stmt_instance(
-                self.stmt_instance_after, int_sid_after))
+                self.stmt_instance_set_after, int_sid_after))
diff --git a/test/test_linearization_checker.py b/test/test_linearization_checker.py
index d3042b36c..c87bb149b 100644
--- a/test/test_linearization_checker.py
+++ b/test/test_linearization_checker.py
@@ -152,8 +152,8 @@ def _lex_space_string(dim_vals):
 
     # Relationship between insn_a and insn_b ---------------------------------------
 
-    assert sched_ab.stmt_instance_before.lex_points == [0, 'i', 0, 'k', 0]
-    assert sched_ab.stmt_instance_after.lex_points == [0, 'i', 1, 'j', 0]
+    assert sched_ab.stmt_instance_set_before.lex_points == [0, 'i', 0, 'k', 0]
+    assert sched_ab.stmt_instance_set_after.lex_points == [0, 'i', 1, 'j', 0]
 
     # Get two maps from the PairwiseScheduleBuilder
 
@@ -187,8 +187,8 @@ def _lex_space_string(dim_vals):
     # ------------------------------------------------------------------------------
     # Relationship between insn_a and insn_c ---------------------------------------
 
-    assert sched_ac.stmt_instance_before.lex_points == [0, 'i', 0, 'k', 0]
-    assert sched_ac.stmt_instance_after.lex_points == [0, 'i', 1, 'j', 0]
+    assert sched_ac.stmt_instance_set_before.lex_points == [0, 'i', 0, 'k', 0]
+    assert sched_ac.stmt_instance_set_after.lex_points == [0, 'i', 1, 'j', 0]
 
     # Get two maps from the PairwiseScheduleBuilder
 
@@ -225,9 +225,10 @@ def _lex_space_string(dim_vals):
     # insn_a and insn_d could have been linearized in either order
     # (i loop could be before or after t loop)
     def perform_insn_ad_checks_with(a_lex_idx, d_lex_idx):
-        assert sched_ad.stmt_instance_before.lex_points == [
+        assert sched_ad.stmt_instance_set_before.lex_points == [
             a_lex_idx, 'i', 0, 'k', 0]
-        assert sched_ad.stmt_instance_after.lex_points == [d_lex_idx, 't', 0, 0, 0]
+        assert sched_ad.stmt_instance_set_after.lex_points == [
+            d_lex_idx, 't', 0, 0, 0]
 
         # Get two maps from the PairwiseScheduleBuilder
 
@@ -271,9 +272,10 @@ def perform_insn_ad_checks_with(a_lex_idx, d_lex_idx):
     # insn_b and insn_c could have been linearized in either order
     # (i loop could be before or after t loop)
     def perform_insn_bc_checks_with(b_lex_idx, c_lex_idx):
-        assert sched_bc.stmt_instance_before.lex_points == [
+        assert sched_bc.stmt_instance_set_before.lex_points == [
             0, 'i', 0, 'j', b_lex_idx]
-        assert sched_bc.stmt_instance_after.lex_points == [0, 'i', 0, 'j', c_lex_idx]
+        assert sched_bc.stmt_instance_set_after.lex_points == [
+            0, 'i', 0, 'j', c_lex_idx]
 
         # Get two maps from the PairwiseScheduleBuilder
 
@@ -317,9 +319,10 @@ def perform_insn_bc_checks_with(b_lex_idx, c_lex_idx):
     # insn_b and insn_d could have been linearized in either order
     # (i loop could be before or after t loop)
     def perform_insn_bd_checks_with(b_lex_idx, d_lex_idx):
-        assert sched_bd.stmt_instance_before.lex_points == [
+        assert sched_bd.stmt_instance_set_before.lex_points == [
             b_lex_idx, 'i', 0, 'j', 0]
-        assert sched_bd.stmt_instance_after.lex_points == [d_lex_idx, 't', 0, 0, 0]
+        assert sched_bd.stmt_instance_set_after.lex_points == [
+            d_lex_idx, 't', 0, 0, 0]
 
         # Get two maps from the PairwiseScheduleBuilder
 
@@ -363,9 +366,10 @@ def perform_insn_bd_checks_with(b_lex_idx, d_lex_idx):
     # insn_c and insn_d could have been linearized in either order
     # (i loop could be before or after t loop)
     def perform_insn_cd_checks_with(c_lex_idx, d_lex_idx):
-        assert sched_cd.stmt_instance_before.lex_points == [
+        assert sched_cd.stmt_instance_set_before.lex_points == [
             c_lex_idx, 'i', 0, 'j', 0]
-        assert sched_cd.stmt_instance_after.lex_points == [d_lex_idx, 't', 0, 0, 0]
+        assert sched_cd.stmt_instance_set_after.lex_points == [
+            d_lex_idx, 't', 0, 0, 0]
 
         # Get two maps from the PairwiseScheduleBuilder
 

From bb18e5823f422ce1b034389c52119fd3a4efd5f7 Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Mon, 13 Jul 2020 02:47:33 -0500
Subject: [PATCH 101/460] remove pad_lex_tuples_with_zeros() method since it's
 only ever called once (in-line its functionality)

---
 loopy/schedule/checker/schedule.py | 18 +++++-------------
 1 file changed, 5 insertions(+), 13 deletions(-)

diff --git a/loopy/schedule/checker/schedule.py b/loopy/schedule/checker/schedule.py
index 04d1315e1..818d8355c 100644
--- a/loopy/schedule/checker/schedule.py
+++ b/loopy/schedule/checker/schedule.py
@@ -230,19 +230,6 @@ def __init__(
         # At this point, pairwise sub-schedule may contain lex point tuples
         # missing dimensions; the values in these missing dims should
         # be zero, so add them.
-        self.pad_lex_tuples_with_zeros()
-
-    def max_lex_dims(self):
-        return max([
-            len(self.stmt_instance_set_before.lex_points),
-            len(self.stmt_instance_set_after.lex_points)])
-
-    def pad_lex_tuples_with_zeros(self):
-        """Find the maximum number of lexicographic dimensions represented
-            in the lexicographic ordering, and if any
-            :class:`StatementInstanceSet` maps to a lex point tuple with
-            fewer dimensions, add a zero for each of the missing dimensions.
-        """
 
         def _pad_lex_tuple_with_zeros(stmt_inst, length):
             return StatementInstanceSet(
@@ -257,6 +244,11 @@ def _pad_lex_tuple_with_zeros(stmt_inst, length):
         self.stmt_instance_set_after = _pad_lex_tuple_with_zeros(
             self.stmt_instance_set_after, max_lex_dim)
 
+    def max_lex_dims(self):
+        return max([
+            len(self.stmt_instance_set_before.lex_points),
+            len(self.stmt_instance_set_after.lex_points)])
+
     def build_maps(
             self,
             knl,

From 2f0c95cec3659c3e154e5fee60b664a21ef7f77e Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Mon, 13 Jul 2020 03:25:15 -0500
Subject: [PATCH 102/460] remove get_isl_space() and set_all_isl_space_names(),
 instead use isl.Space.create_from_names()

---
 loopy/schedule/checker/schedule.py |  6 ++---
 loopy/schedule/checker/utils.py    | 42 ------------------------------
 2 files changed, 3 insertions(+), 45 deletions(-)

diff --git a/loopy/schedule/checker/schedule.py b/loopy/schedule/checker/schedule.py
index 818d8355c..f085547c6 100644
--- a/loopy/schedule/checker/schedule.py
+++ b/loopy/schedule/checker/schedule.py
@@ -272,7 +272,6 @@ def build_maps(
 
         from loopy.schedule.checker.utils import (
             list_var_names_in_isl_sets,
-            get_isl_space,
             create_symbolic_map_from_tuples,
             add_dims_to_isl_set,
         )
@@ -292,8 +291,9 @@ def _get_map_for_stmt_inst(stmt_inst, int_sid):
             dom_inames_ordered = list_var_names_in_isl_sets([dom])
 
             in_names_sched = [STATEMENT_VAR_NAME] + dom_inames_ordered[:]
-            sched_space = get_isl_space(
-                params_sched, in_names_sched, out_names_sched)
+            sched_space = isl.Space.create_from_names(
+                isl.DEFAULT_CONTEXT,
+                in_=in_names_sched, out=out_names_sched, params=params_sched)
 
             # Insert 'statement' dim into domain so that its space allows
             # for intersection with sched map later
diff --git a/loopy/schedule/checker/utils.py b/loopy/schedule/checker/utils.py
index e862d166e..f336d21f7 100644
--- a/loopy/schedule/checker/utils.py
+++ b/loopy/schedule/checker/utils.py
@@ -237,48 +237,6 @@ def create_symbolic_map_from_tuples(
     return _get_union(all_maps)
 
 
-def set_all_isl_space_names(
-        isl_space, param_names=None, in_names=None, out_names=None):
-    """Return a copy of `isl_space` with the specified dimension names.
-        If no names are provided, use `p0, p1, ...` for parameters,
-        `i0, i1, ...`, for in_ dimensions, and `o0, o1, ...` for out
-        dimensions.
-
-    """
-
-    new_space = isl_space.copy()
-    dim_type = isl.dim_type
-    if param_names:
-        for i, p in enumerate(param_names):
-            new_space = new_space.set_dim_name(dim_type.param, i, p)
-    else:
-        for i in range(len(isl_space.get_var_names(dim_type.param))):
-            new_space = new_space.set_dim_name(dim_type.param, i, "p%d" % (i))
-    if in_names:
-        for i, p in enumerate(in_names):
-            new_space = new_space.set_dim_name(dim_type.in_, i, p)
-    else:
-        for i in range(len(isl_space.get_var_names(dim_type.in_))):
-            new_space = new_space.set_dim_name(dim_type.in_, i, "i%d" % (i))
-    if out_names:
-        for i, p in enumerate(out_names):
-            new_space = new_space.set_dim_name(dim_type.out, i, p)
-    else:
-        for i in range(len(isl_space.get_var_names(dim_type.out))):
-            new_space = new_space.set_dim_name(dim_type.out, i, "o%d" % (i))
-    return new_space
-
-
-def get_isl_space(param_names, in_names, out_names):
-    """Return an :class:`islpy.Space` with the specified dimension names.
-    """
-
-    space = isl.Space.alloc(
-        isl.DEFAULT_CONTEXT, len(param_names), len(in_names), len(out_names))
-    return set_all_isl_space_names(
-        space, param_names=param_names, in_names=in_names, out_names=out_names)
-
-
 def get_concurrent_inames(knl):
     from loopy.kernel.data import ConcurrentTag
     conc_inames = set()

From d8587b0f824e45b1640c2747a332100dd9ab469b Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Mon, 13 Jul 2020 04:12:49 -0500
Subject: [PATCH 103/460] perform union of maps upon creation in
 create_symbolic_map_from_tuples() rather than afterward; temporarily leave
 old version in place to test for equality

---
 loopy/schedule/checker/utils.py | 16 +++++++++++++++-
 1 file changed, 15 insertions(+), 1 deletion(-)

diff --git a/loopy/schedule/checker/utils.py b/loopy/schedule/checker/utils.py
index f336d21f7..3a7688e4d 100644
--- a/loopy/schedule/checker/utils.py
+++ b/loopy/schedule/checker/utils.py
@@ -187,7 +187,14 @@ def create_symbolic_map_from_tuples(
 
     # loop through pairs and create a set that will later be converted to a map
 
+    # TODO remove after testing:
     all_maps = []
+
+    # initialize union to empty
+    union_of_maps = isl.Map.from_domain(
+        islvars[0].eq_set(islvars[0]+1)  # 0 == 1 (false)
+        ).move_dims(
+            dim_type.out, 0, dim_type.in_, len(space_in_names), len(space_out_names))
     for (tup_in, tup_out), dom in tuple_pairs_with_domains:
 
         # initialize constraint with true
@@ -231,10 +238,17 @@ def create_symbolic_map_from_tuples(
             )
 
         # intersect domain with this map
+        union_of_maps = union_of_maps.union(
+            map_from_set.intersect_domain(dom_with_all_inames))
+
+        # TODO remove after testing:
         all_maps.append(
             map_from_set.intersect_domain(dom_with_all_inames))
 
-    return _get_union(all_maps)
+    # TODO remove after testing:
+    assert union_of_maps == _get_union(all_maps)
+
+    return union_of_maps
 
 
 def get_concurrent_inames(knl):

From c465bdb4cecfe1f700bede85680b06d51734b634 Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Mon, 13 Jul 2020 04:15:58 -0500
Subject: [PATCH 104/460] remove utils._get_union()

---
 loopy/schedule/checker/utils.py | 17 -----------------
 1 file changed, 17 deletions(-)

diff --git a/loopy/schedule/checker/utils.py b/loopy/schedule/checker/utils.py
index 3a7688e4d..cef985ee6 100644
--- a/loopy/schedule/checker/utils.py
+++ b/loopy/schedule/checker/utils.py
@@ -132,13 +132,6 @@ def ensure_dim_names_match_and_align(obj_map, tgt_map):
     return aligned_obj_map
 
 
-def _get_union(list_items):
-    union = list_items[0]
-    for s in list_items[1:]:
-        union = union.union(s)
-    return union
-
-
 def list_var_names_in_isl_sets(
         isl_sets,
         set_dim=isl.dim_type.set):
@@ -187,9 +180,6 @@ def create_symbolic_map_from_tuples(
 
     # loop through pairs and create a set that will later be converted to a map
 
-    # TODO remove after testing:
-    all_maps = []
-
     # initialize union to empty
     union_of_maps = isl.Map.from_domain(
         islvars[0].eq_set(islvars[0]+1)  # 0 == 1 (false)
@@ -241,13 +231,6 @@ def create_symbolic_map_from_tuples(
         union_of_maps = union_of_maps.union(
             map_from_set.intersect_domain(dom_with_all_inames))
 
-        # TODO remove after testing:
-        all_maps.append(
-            map_from_set.intersect_domain(dom_with_all_inames))
-
-    # TODO remove after testing:
-    assert union_of_maps == _get_union(all_maps)
-
     return union_of_maps
 
 

From 138702bda0750e4693b946ccf31b6e0fa06b23fa Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Mon, 13 Jul 2020 11:45:25 -0500
Subject: [PATCH 105/460] (commented out code that compares result from
 isl.affs_from_space() to result from isl.make_zero_and_vars())

---
 loopy/schedule/checker/utils.py | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/loopy/schedule/checker/utils.py b/loopy/schedule/checker/utils.py
index cef985ee6..0bfdf0a54 100644
--- a/loopy/schedule/checker/utils.py
+++ b/loopy/schedule/checker/utils.py
@@ -29,10 +29,15 @@ def prettier_map_string(map_obj):
 
 
 def get_islvars_from_space(space):
+    #pu.db
     param_names = space.get_var_names(isl.dim_type.param)
     in_names = space.get_var_names(isl.dim_type.in_)
     out_names = space.get_var_names(isl.dim_type.out)
     return isl.make_zero_and_vars(in_names+out_names, param_names)
+    #old = isl.make_zero_and_vars(in_names+out_names, param_names)
+    #new = isl.affs_from_space(space)
+    #assert old == new
+    #return new
 
 
 def add_dims_to_isl_set(isl_set, dim_type, names, new_idx_start):

From 6f7e2a3168f05d6def4925fa1bac13c9032d51bf Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Tue, 14 Jul 2020 08:28:42 -0500
Subject: [PATCH 106/460] removed PairwiseScheduleBuilder class; removed PSB
 methods max_lex_dims() and build_maps() (and __str__() but no one cares);
 combined removed functions/class into single generate_pairwise_schedule()
 func that returns a pair of maps (one for each statement) so that
 intermediate state (what used to be PSB) is no longer kept around

---
 loopy/schedule/checker/__init__.py |  43 +--
 loopy/schedule/checker/schedule.py | 508 +++++++++++++----------------
 test/test_linearization_checker.py | 124 +++----
 3 files changed, 299 insertions(+), 376 deletions(-)

diff --git a/loopy/schedule/checker/__init__.py b/loopy/schedule/checker/__init__.py
index 716a0cb58..a2963f689 100644
--- a/loopy/schedule/checker/__init__.py
+++ b/loopy/schedule/checker/__init__.py
@@ -21,7 +21,7 @@
 """
 
 
-# {{{ create PairwiseScheduleBuilder for statement pair
+# {{{ create a pairwise schedule for statement pair
 
 def get_schedule_for_statement_pair(
         knl,
@@ -29,9 +29,11 @@ def get_schedule_for_statement_pair(
         insn_id_before,
         insn_id_after,
         ):
-    """Create a :class:`loopy.schedule.checker.schedule.PairwiseScheduleBuilder`
-    representing the order of two statements as a mapping from
-    :class:`loopy.schedule.checker.StatementInstanceSet`
+    r"""Given a pair of statements in a linearized kernel, determine
+    the (relative) order in which the instances are executed,
+    by creating a mapping from statement instances to points in a single
+    lexicographic ordering. Create a pair of :class:`islpy.Map`\ s
+    representing a pairwise schedule as two mappings from statement instances
     to lexicographic time.
 
     :arg knl: A :class:`loopy.kernel.LoopKernel` containing the
@@ -50,10 +52,10 @@ def get_schedule_for_statement_pair(
     :arg insn_id_after: An instruction identifier that is unique within
         a :class:`loopy.kernel.LoopKernel`.
 
-    :returns: A :class:`loopy.schedule.checker.schedule.PairwiseScheduleBuilder`
-        representing the order of two statements as a mapping from
-        :class:`loopy.schedule.checker.StatementInstanceSet`
-        to lexicographic time.
+    :returns: A two-tuple containing two :class:`islpy.Map`s
+        representing the a pairwise schedule as two mappings
+        from statement instances to lexicographic time, one for
+        each of the two statements.
 
     Example usage::
 
@@ -76,17 +78,16 @@ def get_schedule_for_statement_pair(
         from loopy.schedule.checker import (
             get_schedule_for_statement_pair,
         )
-        sched_builder_ab = get_schedule_for_statement_pair(
+
+        # Get two maps -----------------------------------------------------------
+
+        sched_a, sched_b = get_schedule_for_statement_pair(
             knl,
             knl.linearization,
             "insn_a",
             "insn_b",
             )
 
-        # Get two maps from the PairwiseScheduleBuilder --------------------------
-
-        sched_a, sched_b = sched_builder_ab.build_maps(knl)
-
         print(sched_a)
         print(sched_b)
 
@@ -112,12 +113,11 @@ def get_schedule_for_statement_pair(
 
     # }}}
 
-    # {{{ find any EnterLoop inames that are tagged as concurrent
-
-    # so that PairwiseScheduleBuilder knows to ignore them
+    # {{{ Find any EnterLoop inames that are tagged as concurrent
+    # so that generate_pairwise_schedule() knows to ignore them
     # (In the future, this shouldn't be necessary because there
-    #  won't be any inames with ConcurrentTags in EnterLoop linearization items.
-    #  Test which exercises this: test_linearization_checker_with_stroud_bernstein())
+    # won't be any inames with ConcurrentTags in EnterLoop linearization items.
+    # Test which exercises this: test_linearization_checker_with_stroud_bernstein())
     from loopy.schedule.checker.utils import (
         get_concurrent_inames,
         get_EnterLoop_inames,
@@ -134,11 +134,12 @@ def get_schedule_for_statement_pair(
 
     # }}}
 
-    # {{{ Create PairwiseScheduleBuilder: mapping of {statement instance: lex point}
+    # {{{ Create two mappings from {statement instance: lex point}
 
     # include only instructions involved in this dependency
-    from loopy.schedule.checker.schedule import PairwiseScheduleBuilder
-    return PairwiseScheduleBuilder(
+    from loopy.schedule.checker.schedule import generate_pairwise_schedule
+    return generate_pairwise_schedule(
+        preproc_knl,
         linearization_items,
         insn_id_before,
         insn_id_after,
diff --git a/loopy/schedule/checker/schedule.py b/loopy/schedule/checker/schedule.py
index f085547c6..9e693a9a1 100644
--- a/loopy/schedule/checker/schedule.py
+++ b/loopy/schedule/checker/schedule.py
@@ -79,280 +79,242 @@ def __repr__(self):
             self.__class__.__name__, self.insn_id, self.lex_points)
 
 
-class PairwiseScheduleBuilder(object):
-    """Given a pair of statements in a linearized kernel, PairwiseScheduleBuilder
-    determines the (relative) order in which the instances are executed,
+def generate_pairwise_schedule(
+        knl,
+        linearization_items_ordered,
+        before_insn_id,
+        after_insn_id,
+        loops_to_ignore=set(),
+        ):
+    r"""Given a pair of statements in a linearized kernel, determine
+    the (relative) order in which the instances are executed,
     by creating a mapping from statement instances to points in a single
-    lexicographic ordering. The function
-    :func:`loopy.schedule.checker.get_schedule_for_statement_pair` is the
-    preferred method of creating a PairwiseScheduleBuilder.
-
-    .. attribute:: stmt_instance_set_before
-
-        A :class:`StatementInstanceSet` whose ordering relative
-        to `stmt_instance_set_after is described by PairwiseScheduleBuilder. This
-        is achieved by mapping the statement instances in both sets to points
-        in a single lexicographic ordering. Points in lexicographic ordering
-        are represented as a list of :class:`int` or as :class:`str`
-        :mod:`loopy` inames.
-
-    .. attribute:: stmt_instance_set_after
-
-        A :class:`StatementInstanceSet` whose ordering relative
-        to `stmt_instance_set_before is described by PairwiseScheduleBuilder. This
-        is achieved by mapping the statement instances in both sets to points
-        in a single lexicographic ordering. Points in lexicographic ordering
-        are represented as a list of :class:`int` or as :class:`str`
-        :mod:`loopy` inames.
+    lexicographic ordering. Create a pair of :class:`islpy.Map`\ s
+    representing a pairwise schedule as two mappings from statement instances
+    to lexicographic time.
+
+    :arg knl: A :class:`loopy.kernel.LoopKernel` containing the
+        linearization items that will be described by the schedule. This
+        kernel will be used to get the domains associated with the inames
+        used in the statements.
+
+    :arg linearization_items_ordered: A list of :class:`loopy.schedule.ScheduleItem`
+        (to be renamed to `loopy.schedule.LinearizationItem`) containing the
+        two linearization items whose relative order will be described by the
+        schedule. This list may be a *partial* linearization for a kernel since
+        this function may be used during the linearization process.
+
+    :arg before_insn_id: A :class:`str` instruction id specifying
+        stmt_instance_set_before in this pair of instructions.
+
+    :arg after_insn_id: A :class:`str` instruction id specifying
+        stmt_instance_set_after in this pair of instructions.
+
+    :returns: A two-tuple containing two :class:`islpy.Map`s
+        representing the a pairwise schedule as two mappings
+        from statement instances to lexicographic time, one for
+        each of the two statements.
     """
 
-    def __init__(
-            self,
-            linearization_items_ordered,
-            before_insn_id,
-            after_insn_id,
-            loops_to_ignore=set(),
-            ):
-        """
-        :arg linearization_items_ordered: A list of :class:`ScheduleItem` whose
-            order will be described by this :class:`PairwiseScheduleBuilder`.
-
-        :arg before_insn_id: A :class:`str` instruction id specifying
-            stmt_instance_set_before in this pair of instructions.
-
-        :arg after_insn_id: A :class:`str` instruction id specifying
-            stmt_instancce_after in this pair of instructions.
-
-        """
-
-        # PairwiseScheduleBuilder statements
-        self.stmt_instance_set_before = None
-        self.stmt_instance_set_after = None
-
-        # TODO when/after dependencies are added, consider the possibility
-        # of removing the two-statements-per-PairwiseScheduleBuilder limitation
-
-        from loopy.schedule import (EnterLoop, LeaveLoop, Barrier, RunInstruction)
-
-        # go through linearization_items_ordered and generate pairwise sub-schedule
-
-        # keep track of the next tuple of points in our lexicographic
-        # ordering, initially this as a 1-d point with value 0
-        next_insn_lex_tuple = [0]
-        stmt_added_since_prev_block_at_tier = [False]
-        for linearization_item in linearization_items_ordered:
-            if isinstance(linearization_item, EnterLoop):
-                iname = linearization_item.iname
-                if iname in loops_to_ignore:
-                    continue
-
-                # We could always increment next_insn_lex_tuple[-1] here since
-                # this new section of code comes after the previous section
-                # (statements since last opened/closed loop), but if we have
-                # not added any statements within the previous section yet, we
-                # don't have to (effectively ignoring that section of code).
-                if stmt_added_since_prev_block_at_tier[-1]:
-                    next_insn_lex_tuple[-1] = next_insn_lex_tuple[-1]+1
-                    stmt_added_since_prev_block_at_tier[-1] = False
-
-                # upon entering a loop, we enter a new (deeper) tier,
-                # add one lex dimension for the loop variable,
-                # add second lex dim to enumerate code blocks within new loop, and
-                # append a dim to stmt_added_since_prev_block_at_tier to represent
-                # new tier
-                next_insn_lex_tuple.append(iname)
-                next_insn_lex_tuple.append(0)
-                stmt_added_since_prev_block_at_tier.append(False)
-            elif isinstance(linearization_item, LeaveLoop):
-                if linearization_item.iname in loops_to_ignore:
-                    continue
-                # upon leaving a loop,
-                # pop lex dimension for enumerating code blocks within this loop, and
-                # pop lex dimension for the loop variable, and
+    # Two StatementInstanceSets, one for each statement:
+
+    """
+    stmt_instance_set_before
+
+    A :class:`StatementInstanceSet` whose ordering relative
+    to `stmt_instance_set_after is described by the schedule blueprint. This
+    is achieved by mapping the statement instances in both sets to points
+    in a single lexicographic ordering. Points in lexicographic ordering
+    are represented as a list of :class:`int` or as :class:`str`
+    :mod:`loopy` inames.
+    """
+    stmt_instance_set_before = None
+
+    """
+    stmt_instance_set_after
+
+    A :class:`StatementInstanceSet` whose ordering relative
+    to `stmt_instance_set_before is described by the schedule blueprint. This
+    is achieved by mapping the statement instances in both sets to points
+    in a single lexicographic ordering. Points in lexicographic ordering
+    are represented as a list of :class:`int` or as :class:`str`
+    :mod:`loopy` inames.
+    """
+    stmt_instance_set_after = None
+
+    from loopy.schedule import (EnterLoop, LeaveLoop, Barrier, RunInstruction)
+
+    # go through linearization_items_ordered and generate pairwise sub-schedule
+
+    # keep track of the next tuple of points in our lexicographic
+    # ordering, initially this as a 1-d point with value 0
+    next_insn_lex_tuple = [0]
+    stmt_added_since_prev_block_at_tier = [False]
+    max_lex_dim = 0
+    for linearization_item in linearization_items_ordered:
+        if isinstance(linearization_item, EnterLoop):
+            iname = linearization_item.iname
+            if iname in loops_to_ignore:
+                continue
+
+            # We could always increment next_insn_lex_tuple[-1] here since
+            # this new section of code comes after the previous section
+            # (statements since last opened/closed loop), but if we have
+            # not added any statements within the previous section yet, we
+            # don't have to (effectively ignoring that section of code).
+            if stmt_added_since_prev_block_at_tier[-1]:
+                next_insn_lex_tuple[-1] = next_insn_lex_tuple[-1]+1
+                stmt_added_since_prev_block_at_tier[-1] = False
+
+            # upon entering a loop, we enter a new (deeper) tier,
+            # add one lex dimension for the loop variable,
+            # add second lex dim to enumerate code blocks within new loop, and
+            # append a dim to stmt_added_since_prev_block_at_tier to represent
+            # new tier
+            next_insn_lex_tuple.append(iname)
+            next_insn_lex_tuple.append(0)
+            stmt_added_since_prev_block_at_tier.append(False)
+        elif isinstance(linearization_item, LeaveLoop):
+            if linearization_item.iname in loops_to_ignore:
+                continue
+            # upon leaving a loop,
+            # pop lex dimension for enumerating code blocks within this loop, and
+            # pop lex dimension for the loop variable, and
+            # increment lex dim val enumerating items in current code block
+            next_insn_lex_tuple.pop()
+            next_insn_lex_tuple.pop()
+
+            # We could always increment next_insn_lex_tuple[-1] here since
+            # this new block of code comes after the previous block (all
+            # statements since last opened/closed loop), but if we have not
+            # added any statements within the previous section yet, we
+            # don't have to (effectively ignoring that section of code).
+            stmt_added_since_prev_block_at_tier.pop()
+            if stmt_added_since_prev_block_at_tier[-1]:
+                next_insn_lex_tuple[-1] = next_insn_lex_tuple[-1]+1
+                stmt_added_since_prev_block_at_tier[-1] = False
+        elif isinstance(linearization_item, (RunInstruction, Barrier)):
+            from loopy.schedule.checker.utils import (
+                get_insn_id_from_linearization_item,
+            )
+            lp_insn_id = get_insn_id_from_linearization_item(linearization_item)
+            if lp_insn_id is None:
+                # TODO make sure it's okay to ignore barriers without id
+                # (because they'll never be part of a dependency?)
+                # matmul example has barrier that fails this assertion...
+                # assert linearization_item.originating_insn_id is not None
+                continue
+
+            # only process before/after insns, otherwise ignore
+            stmt_added = False
+
+            if lp_insn_id == before_insn_id:
+                # add before sched item
+                stmt_instance_set_before = StatementInstanceSet(
+                        lp_insn_id,
+                        next_insn_lex_tuple[:])
+                stmt_added = True
+
+            if lp_insn_id == after_insn_id:
+                # add after sched item
+                stmt_instance_set_after = StatementInstanceSet(
+                        lp_insn_id,
+                        next_insn_lex_tuple[:])
+                stmt_added = True
+
+            # Note: before/after may refer to same stmt, in which case
+            # both of the above conditionals execute
+
+            if stmt_added:
+
+                # track the max number of lex dims used
+                if len(next_insn_lex_tuple) > max_lex_dim:
+                    max_lex_dim = len(next_insn_lex_tuple)
+
                 # increment lex dim val enumerating items in current code block
-                next_insn_lex_tuple.pop()
-                next_insn_lex_tuple.pop()
-
-                # We could always increment next_insn_lex_tuple[-1] here since
-                # this new block of code comes after the previous block (all
-                # statements since last opened/closed loop), but if we have not
-                # added any statements within the previous section yet, we
-                # don't have to (effectively ignoring that section of code).
-                stmt_added_since_prev_block_at_tier.pop()
-                if stmt_added_since_prev_block_at_tier[-1]:
-                    next_insn_lex_tuple[-1] = next_insn_lex_tuple[-1]+1
-                    stmt_added_since_prev_block_at_tier[-1] = False
-            elif isinstance(linearization_item, (RunInstruction, Barrier)):
-                from loopy.schedule.checker.utils import (
-                    get_insn_id_from_linearization_item,
-                )
-                lp_insn_id = get_insn_id_from_linearization_item(linearization_item)
-                if lp_insn_id is None:
-                    # TODO make sure it's okay to ignore barriers without id
-                    # (because they'll never be part of a dependency?)
-                    # matmul example has barrier that fails this assertion...
-                    # assert linearization_item.originating_insn_id is not None
-                    continue
-
-                # only process before/after insns, otherwise ignore
-                stmt_added = False
-
-                if lp_insn_id == before_insn_id:
-                    # add before sched item
-                    self.stmt_instance_set_before = StatementInstanceSet(
-                            lp_insn_id,
-                            next_insn_lex_tuple[:])
-                    stmt_added = True
-
-                if lp_insn_id == after_insn_id:
-                    # add after sched item
-                    self.stmt_instance_set_after = StatementInstanceSet(
-                            lp_insn_id,
-                            next_insn_lex_tuple[:])
-                    stmt_added = True
-
-                # Note: before/after may refer to same stmt, in which case
-                # both of the above conditionals execute
-
-                if stmt_added:
-                    # increment lex dim val enumerating items in current code block
-                    next_insn_lex_tuple[-1] = next_insn_lex_tuple[-1] + 1
-
-                    # all current (nested) blocks now contain a statement
-                    stmt_added_since_prev_block_at_tier = [True]*len(
-                        stmt_added_since_prev_block_at_tier)
-            else:
-                pass
-            # to save time, stop when we've created both statements
-            if self.stmt_instance_set_before and self.stmt_instance_set_after:
-                break
-
-        # At this point, pairwise sub-schedule may contain lex point tuples
-        # missing dimensions; the values in these missing dims should
-        # be zero, so add them.
-
-        def _pad_lex_tuple_with_zeros(stmt_inst, length):
-            return StatementInstanceSet(
-                stmt_inst.insn_id,
-                stmt_inst.lex_points[:] + [0]*(length-len(stmt_inst.lex_points)),
-                )
-
-        max_lex_dim = self.max_lex_dims()
-
-        self.stmt_instance_set_before = _pad_lex_tuple_with_zeros(
-            self.stmt_instance_set_before, max_lex_dim)
-        self.stmt_instance_set_after = _pad_lex_tuple_with_zeros(
-            self.stmt_instance_set_after, max_lex_dim)
-
-    def max_lex_dims(self):
-        return max([
-            len(self.stmt_instance_set_before.lex_points),
-            len(self.stmt_instance_set_after.lex_points)])
-
-    def build_maps(
-            self,
-            knl,
-            ):
-        r"""Create a pair of :class:`islpy.Map`\ s representing a pairwise schedule
-            as two mappings from statement instances to lexicographic time,
-            one for ``stmt_instance_set_before`` and one for
-            ``stmt_instance_set_after``.
-
-        :arg knl: A :class:`loopy.kernel.LoopKernel` containing the
-            linearization items that are described by the schedule. This
-            kernel will be used to get the domains associated with the inames
-            used in the statements.
-
-        :returns: A two-tuple containing two :class:`islpy.Map`s
-            representing the a pairwise schedule as two mappings
-            from statement instances to lexicographic time, one for
-            each of the two :class:`StatementInstanceSet`s.
-
-        """
-
-        from loopy.schedule.checker.utils import (
-            list_var_names_in_isl_sets,
-            create_symbolic_map_from_tuples,
-            add_dims_to_isl_set,
-        )
-
-        params_sched = []
-        out_names_sched = self.get_lex_var_names()
-
-        def _get_map_for_stmt_inst(stmt_inst, int_sid):
-
-            # Get inames domain for statement instance (a BasicSet)
-            dom = knl.get_inames_domain(
-                knl.id_to_insn[stmt_inst.insn_id].within_inames)
-
-            # create space (an isl space in current implementation)
-            # {('statement', <inames> used in statement domain>) ->
-            #  (lexicographic ordering dims)}
-            dom_inames_ordered = list_var_names_in_isl_sets([dom])
-
-            in_names_sched = [STATEMENT_VAR_NAME] + dom_inames_ordered[:]
-            sched_space = isl.Space.create_from_names(
-                isl.DEFAULT_CONTEXT,
-                in_=in_names_sched, out=out_names_sched, params=params_sched)
-
-            # Insert 'statement' dim into domain so that its space allows
-            # for intersection with sched map later
-            dom_to_intersect = [
-                add_dims_to_isl_set(
-                    dom, isl.dim_type.set, [STATEMENT_VAR_NAME], 0), ]
-
-            # Each map representing the schedule will map
-            # statement instances -> lex time.
-            # Right now, statement instance tuples consist of single int.
-            # Add all inames from domains to each map domain tuple.
-            tuple_pair = [(
-                (int_sid, ) + tuple(dom_inames_ordered),
-                stmt_inst.lex_points
-                )]
-
-            # create map
-            return create_symbolic_map_from_tuples(
-                tuple_pairs_with_domains=zip(tuple_pair, dom_to_intersect),
-                space=sched_space,
-                )
-
-        # Determine integer IDs that will represent each statement in mapping
-        # (dependency map creation assumes sid_before=0 and sid_after=1, unless
-        # before and after refer to same stmt, in which case sid_before=sid_after=0)
-        int_sid_before = 0
-        int_sid_after = 0 if (
-            self.stmt_instance_set_before.insn_id ==
-            self.stmt_instance_set_after.insn_id
-            ) else 1
-
-        map_before = _get_map_for_stmt_inst(
-            self.stmt_instance_set_before, int_sid_before)
-        map_after = _get_map_for_stmt_inst(
-            self.stmt_instance_set_after, int_sid_after)
-
-        return (map_before, map_after)
-
-    def get_lex_var_names(self):
-        return [LEX_VAR_PREFIX+str(i) for i in range(self.max_lex_dims())]
-
-    def __str__(self):
-
-        def stringify_sched_stmt_instance(stmt_inst, int_sid):
-            return "{\n[%s=%s,<inames>] -> %s;\n}" % (
-                STATEMENT_VAR_NAME,
-                int_sid,
-                stmt_inst.lex_points)
-
-        # TODO once we change class -> funcs, this repetition of logic will disappear
-        int_sid_before = 0
-        int_sid_after = 0 if (
-            self.stmt_instance_set_before.insn_id ==
-            self.stmt_instance_set_after.insn_id
-            ) else 1
-
-        return "%s(\nBefore: %s\nAfter: %s\n)" % (
-            self.__class__.__name__,
-            stringify_sched_stmt_instance(
-                self.stmt_instance_set_before, int_sid_before),
-            stringify_sched_stmt_instance(
-                self.stmt_instance_set_after, int_sid_after))
+                next_insn_lex_tuple[-1] = next_insn_lex_tuple[-1] + 1
+
+                # all current (nested) blocks now contain a statement
+                stmt_added_since_prev_block_at_tier = [True]*len(
+                    stmt_added_since_prev_block_at_tier)
+        else:
+            pass
+        # to save time, stop when we've created both statements
+        if stmt_instance_set_before and stmt_instance_set_after:
+            break
+
+    # At this point, pairwise sub-schedule may contain lex point tuples
+    # missing dimensions; the values in these missing dims should
+    # be zero, so add them.
+
+    def _pad_lex_tuple_with_zeros(stmt_inst, length):
+        return StatementInstanceSet(
+            stmt_inst.insn_id,
+            stmt_inst.lex_points[:] + [0]*(length-len(stmt_inst.lex_points)),
+            )
+
+    stmt_instance_set_before = _pad_lex_tuple_with_zeros(
+        stmt_instance_set_before, max_lex_dim)
+    stmt_instance_set_after = _pad_lex_tuple_with_zeros(
+        stmt_instance_set_after, max_lex_dim)
+
+    # Now generate maps from the blueprint ---------------------------------------
+
+    from loopy.schedule.checker.utils import (
+        list_var_names_in_isl_sets,
+        create_symbolic_map_from_tuples,
+        add_dims_to_isl_set,
+    )
+
+    params_sched = []
+    out_names_sched = [LEX_VAR_PREFIX+str(i) for i in range(max_lex_dim)]
+
+    def _get_map_for_stmt_inst(stmt_inst, int_sid):
+
+        # Get inames domain for statement instance (a BasicSet)
+        dom = knl.get_inames_domain(
+            knl.id_to_insn[stmt_inst.insn_id].within_inames)
+
+        # create space (an isl space in current implementation)
+        # {('statement', <inames> used in statement domain>) ->
+        #  (lexicographic ordering dims)}
+        dom_inames_ordered = list_var_names_in_isl_sets([dom])
+
+        in_names_sched = [STATEMENT_VAR_NAME] + dom_inames_ordered[:]
+        sched_space = isl.Space.create_from_names(
+            isl.DEFAULT_CONTEXT,
+            in_=in_names_sched, out=out_names_sched, params=params_sched)
+
+        # Insert 'statement' dim into domain so that its space allows
+        # for intersection with sched map later
+        dom_to_intersect = [
+            add_dims_to_isl_set(
+                dom, isl.dim_type.set, [STATEMENT_VAR_NAME], 0), ]
+
+        # Each map representing the schedule will map
+        # statement instances -> lex time.
+        # Right now, statement instance tuples consist of single int.
+        # Add all inames from domains to each map domain tuple.
+        tuple_pair = [(
+            (int_sid, ) + tuple(dom_inames_ordered),
+            stmt_inst.lex_points
+            )]
+
+        # create map
+        return create_symbolic_map_from_tuples(
+            tuple_pairs_with_domains=zip(tuple_pair, dom_to_intersect),
+            space=sched_space,
+            )
+
+    # Determine integer IDs that will represent each statement in mapping
+    # (dependency map creation assumes sid_before=0 and sid_after=1, unless
+    # before and after refer to same stmt, in which case sid_before=sid_after=0)
+    int_sid_before = 0
+    int_sid_after = 0 if (
+        stmt_instance_set_before.insn_id == stmt_instance_set_after.insn_id
+        ) else 1
+
+    map_before = _get_map_for_stmt_inst(stmt_instance_set_before, int_sid_before)
+    map_after = _get_map_for_stmt_inst(stmt_instance_set_after, int_sid_after)
+
+    return (map_before, map_after)
diff --git a/test/test_linearization_checker.py b/test/test_linearization_checker.py
index c87bb149b..3834f280a 100644
--- a/test/test_linearization_checker.py
+++ b/test/test_linearization_checker.py
@@ -96,44 +96,6 @@ def test_lexschedule_and_map_creation():
     knl = get_one_linearized_kernel(knl)
     linearization_items = knl.linearization
 
-    # Create PairwiseScheduleBuilder: mapping of {statement instance: lex point}
-    sched_ab = get_schedule_for_statement_pair(
-        knl,
-        linearization_items,
-        "insn_a",
-        "insn_b",
-        )
-    sched_ac = get_schedule_for_statement_pair(
-        knl,
-        linearization_items,
-        "insn_a",
-        "insn_c",
-        )
-    sched_ad = get_schedule_for_statement_pair(
-        knl,
-        linearization_items,
-        "insn_a",
-        "insn_d",
-        )
-    sched_bc = get_schedule_for_statement_pair(
-        knl,
-        linearization_items,
-        "insn_b",
-        "insn_c",
-        )
-    sched_bd = get_schedule_for_statement_pair(
-        knl,
-        linearization_items,
-        "insn_b",
-        "insn_d",
-        )
-    sched_cd = get_schedule_for_statement_pair(
-        knl,
-        linearization_items,
-        "insn_c",
-        "insn_d",
-        )
-
     # There are multiple potential linearization orders for this kernel, so when
     # performing our comparisons for schedule correctness, we need to know which
     # order loopy chose.
@@ -152,12 +114,13 @@ def _lex_space_string(dim_vals):
 
     # Relationship between insn_a and insn_b ---------------------------------------
 
-    assert sched_ab.stmt_instance_set_before.lex_points == [0, 'i', 0, 'k', 0]
-    assert sched_ab.stmt_instance_set_after.lex_points == [0, 'i', 1, 'j', 0]
-
-    # Get two maps from the PairwiseScheduleBuilder
-
-    sched_map_before, sched_map_after = sched_ab.build_maps(knl)
+    # Get two maps
+    sched_map_before, sched_map_after = get_schedule_for_statement_pair(
+        knl,
+        linearization_items,
+        "insn_a",
+        "insn_b",
+        )
 
     # Create expected maps, align, compare
 
@@ -187,12 +150,13 @@ def _lex_space_string(dim_vals):
     # ------------------------------------------------------------------------------
     # Relationship between insn_a and insn_c ---------------------------------------
 
-    assert sched_ac.stmt_instance_set_before.lex_points == [0, 'i', 0, 'k', 0]
-    assert sched_ac.stmt_instance_set_after.lex_points == [0, 'i', 1, 'j', 0]
-
-    # Get two maps from the PairwiseScheduleBuilder
-
-    sched_map_before, sched_map_after = sched_ac.build_maps(knl)
+    # Get two maps
+    sched_map_before, sched_map_after = get_schedule_for_statement_pair(
+        knl,
+        linearization_items,
+        "insn_a",
+        "insn_c",
+        )
 
     # Create expected maps, align, compare
 
@@ -225,14 +189,13 @@ def _lex_space_string(dim_vals):
     # insn_a and insn_d could have been linearized in either order
     # (i loop could be before or after t loop)
     def perform_insn_ad_checks_with(a_lex_idx, d_lex_idx):
-        assert sched_ad.stmt_instance_set_before.lex_points == [
-            a_lex_idx, 'i', 0, 'k', 0]
-        assert sched_ad.stmt_instance_set_after.lex_points == [
-            d_lex_idx, 't', 0, 0, 0]
-
-        # Get two maps from the PairwiseScheduleBuilder
-
-        sched_map_before, sched_map_after = sched_ad.build_maps(knl)
+        # Get two maps
+        sched_map_before, sched_map_after = get_schedule_for_statement_pair(
+            knl,
+            linearization_items,
+            "insn_a",
+            "insn_d",
+            )
 
         # Create expected maps, align, compare
 
@@ -272,14 +235,13 @@ def perform_insn_ad_checks_with(a_lex_idx, d_lex_idx):
     # insn_b and insn_c could have been linearized in either order
     # (i loop could be before or after t loop)
     def perform_insn_bc_checks_with(b_lex_idx, c_lex_idx):
-        assert sched_bc.stmt_instance_set_before.lex_points == [
-            0, 'i', 0, 'j', b_lex_idx]
-        assert sched_bc.stmt_instance_set_after.lex_points == [
-            0, 'i', 0, 'j', c_lex_idx]
-
-        # Get two maps from the PairwiseScheduleBuilder
-
-        sched_map_before, sched_map_after = sched_bc.build_maps(knl)
+        # Get two maps
+        sched_map_before, sched_map_after = get_schedule_for_statement_pair(
+            knl,
+            linearization_items,
+            "insn_b",
+            "insn_c",
+            )
 
         # Create expected maps, align, compare
 
@@ -319,14 +281,13 @@ def perform_insn_bc_checks_with(b_lex_idx, c_lex_idx):
     # insn_b and insn_d could have been linearized in either order
     # (i loop could be before or after t loop)
     def perform_insn_bd_checks_with(b_lex_idx, d_lex_idx):
-        assert sched_bd.stmt_instance_set_before.lex_points == [
-            b_lex_idx, 'i', 0, 'j', 0]
-        assert sched_bd.stmt_instance_set_after.lex_points == [
-            d_lex_idx, 't', 0, 0, 0]
-
-        # Get two maps from the PairwiseScheduleBuilder
-
-        sched_map_before, sched_map_after = sched_bd.build_maps(knl)
+        # Get two maps
+        sched_map_before, sched_map_after = get_schedule_for_statement_pair(
+            knl,
+            linearization_items,
+            "insn_b",
+            "insn_d",
+            )
 
         # Create expected maps, align, compare
 
@@ -366,14 +327,13 @@ def perform_insn_bd_checks_with(b_lex_idx, d_lex_idx):
     # insn_c and insn_d could have been linearized in either order
     # (i loop could be before or after t loop)
     def perform_insn_cd_checks_with(c_lex_idx, d_lex_idx):
-        assert sched_cd.stmt_instance_set_before.lex_points == [
-            c_lex_idx, 'i', 0, 'j', 0]
-        assert sched_cd.stmt_instance_set_after.lex_points == [
-            d_lex_idx, 't', 0, 0, 0]
-
-        # Get two maps from the PairwiseScheduleBuilder
-
-        sched_map_before, sched_map_after = sched_cd.build_maps(knl)
+        # Get two maps
+        sched_map_before, sched_map_after = get_schedule_for_statement_pair(
+            knl,
+            linearization_items,
+            "insn_c",
+            "insn_d",
+            )
 
         # Create expected maps, align, compare
 

From 200eed41de56f90bec1a8c3f85d6a3ef9ddc05bc Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Tue, 14 Jul 2020 09:07:08 -0500
Subject: [PATCH 107/460] update tests after removeal of
 PairwiseScheduleBuilder class

---
 test/test_linearization_checker.py | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/test/test_linearization_checker.py b/test/test_linearization_checker.py
index 82658bc01..9ad268edb 100644
--- a/test/test_linearization_checker.py
+++ b/test/test_linearization_checker.py
@@ -50,7 +50,7 @@
     faulthandler.enable()
 
 
-# {{{ test PairwiseScheduleBuilder and map creation
+# {{{ test pairwise schedule map creation
 
 def test_pairwise_schedule_and_map_creation():
     import islpy as isl
@@ -379,6 +379,9 @@ def test_statement_instance_ordering_creation():
     from loopy.schedule.checker import (
         get_schedule_for_statement_pair,
     )
+    from loopy.schedule.checker.schedule import (
+        get_lex_order_map_for_sched_space,
+    )
     from loopy.schedule.checker.utils import (
         ensure_dim_names_match_and_align,
         append_marker_to_isl_map_var_names,
@@ -431,18 +434,16 @@ def check_sio_for_insn_pair(
             expected_sio,
             ):
 
-        sched_builder = get_schedule_for_statement_pair(
+        # Get pairwise schedule
+        sched_map_before, sched_map_after = get_schedule_for_statement_pair(
             knl,
             linearization_items,
             insn_id_before,
             insn_id_after,
             )
 
-        # Get two isl maps from the PairwiseScheduleBuilder
-        sched_map_before, sched_map_after = sched_builder.build_maps(knl)
-
         # get map representing lexicographic ordering
-        sched_lex_order_map = sched_builder.get_lex_order_map_for_sched_space()
+        sched_lex_order_map = get_lex_order_map_for_sched_space(sched_map_before)
 
         assert sched_lex_order_map == expected_lex_order_map
 

From cd1c1310b88d4f22157e6f9b5b79774f0e5f397f Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Tue, 14 Jul 2020 09:07:49 -0500
Subject: [PATCH 108/460] in create_lex_order_map(), make n_dims arg optional

---
 loopy/schedule/checker/lexicographic_order_map.py | 4 +++-
 loopy/schedule/checker/schedule.py                | 3 +--
 2 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/loopy/schedule/checker/lexicographic_order_map.py b/loopy/schedule/checker/lexicographic_order_map.py
index b547e1d94..0966cba99 100644
--- a/loopy/schedule/checker/lexicographic_order_map.py
+++ b/loopy/schedule/checker/lexicographic_order_map.py
@@ -128,7 +128,7 @@ def get_lex_order_constraint(before_names, after_names, islvars=None):
 
 
 def create_lex_order_map(
-        n_dims,
+        n_dims=None,
         before_names=None,
         after_names=None,
         ):
@@ -166,6 +166,8 @@ def create_lex_order_map(
             append_marker_to_strings,
         )
         before_names = append_marker_to_strings(after_names, marker="'")
+    if n_dims is None:
+        n_dims = len(after_names)
 
     assert len(before_names) == len(after_names) == n_dims
     dim_type = isl.dim_type
diff --git a/loopy/schedule/checker/schedule.py b/loopy/schedule/checker/schedule.py
index ad2ecefc6..a73c72cb2 100644
--- a/loopy/schedule/checker/schedule.py
+++ b/loopy/schedule/checker/schedule.py
@@ -341,5 +341,4 @@ def get_lex_order_map_for_sched_space(schedule):
     )
 
     lex_dim_names = schedule.space.get_var_names(isl.dim_type.out)
-    return create_lex_order_map(
-        len(lex_dim_names), after_names=lex_dim_names)
+    return create_lex_order_map(after_names=lex_dim_names)

From 87ac1b9e8c99f52e4a64f3db701159e907bc3764 Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Tue, 14 Jul 2020 09:13:00 -0500
Subject: [PATCH 109/460] rename
 test_lexschedule_and_map_creation()->test_lexschedule_creation()

---
 test/test_linearization_checker.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/test/test_linearization_checker.py b/test/test_linearization_checker.py
index 3834f280a..60abfade0 100644
--- a/test/test_linearization_checker.py
+++ b/test/test_linearization_checker.py
@@ -50,7 +50,7 @@
     faulthandler.enable()
 
 
-def test_lexschedule_and_map_creation():
+def test_lexschedule_creation():
     import islpy as isl
     from loopy.schedule.checker import (
         get_schedule_for_statement_pair,

From 2251fa20eefb7f978c1a16131cefdeeb36c2586a Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Tue, 14 Jul 2020 10:30:02 -0500
Subject: [PATCH 110/460] remove StatementInstanceSet class, use an
 ImmutableRecord instead

---
 loopy/schedule/checker/schedule.py | 78 ++++++------------------------
 1 file changed, 14 insertions(+), 64 deletions(-)

diff --git a/loopy/schedule/checker/schedule.py b/loopy/schedule/checker/schedule.py
index 9e693a9a1..fc4938c98 100644
--- a/loopy/schedule/checker/schedule.py
+++ b/loopy/schedule/checker/schedule.py
@@ -47,38 +47,6 @@
 STATEMENT_VAR_NAME = "%sstatement" % (LIN_CHECK_IDENTIFIER_PREFIX)
 
 
-class StatementInstanceSet(object):
-    """A representation of a set of (non-concurrent) instances of a
-    statement being executed. The ordering of the instances is described
-    by the `lex_points` attribute, a list representing points in a
-    lexicographic ordering of statements. Each field in the list
-    corresponds to a dimension in the lexicographic ordering.
-
-    .. attribute:: insn_id
-
-        A :class:`str` instruction identifier that is unique within
-        a :class:`loopy.kernel.LoopKernel`.
-
-    .. attribute:: lex_points
-
-        A list containing one value for each dimension in a lexicographic
-        ordering. These values describe the ordering of the statements,
-        and may be :class:`str` :mod:`loopy` inames or :class:`int`.
-    """
-
-    def __init__(
-            self,
-            insn_id,
-            lex_points,
-            ):
-        self.insn_id = insn_id
-        self.lex_points = lex_points
-
-    def __repr__(self):
-        return "%s(%s, %s)" % (
-            self.__class__.__name__, self.insn_id, self.lex_points)
-
-
 def generate_pairwise_schedule(
         knl,
         linearization_items_ordered,
@@ -116,33 +84,15 @@ def generate_pairwise_schedule(
         each of the two statements.
     """
 
-    # Two StatementInstanceSets, one for each statement:
-
-    """
-    stmt_instance_set_before
-
-    A :class:`StatementInstanceSet` whose ordering relative
-    to `stmt_instance_set_after is described by the schedule blueprint. This
-    is achieved by mapping the statement instances in both sets to points
-    in a single lexicographic ordering. Points in lexicographic ordering
-    are represented as a list of :class:`int` or as :class:`str`
-    :mod:`loopy` inames.
-    """
+    # For each statement, create a :class:`ImmutableRecord` describing the set of
+    # statement instances. Contains the insn_id and a list representing points
+    # in the lexicographic ordering containing items of :class:`int` or
+    # :class:`str` :mod:`loopy` inames.
     stmt_instance_set_before = None
-
-    """
-    stmt_instance_set_after
-
-    A :class:`StatementInstanceSet` whose ordering relative
-    to `stmt_instance_set_before is described by the schedule blueprint. This
-    is achieved by mapping the statement instances in both sets to points
-    in a single lexicographic ordering. Points in lexicographic ordering
-    are represented as a list of :class:`int` or as :class:`str`
-    :mod:`loopy` inames.
-    """
     stmt_instance_set_after = None
 
     from loopy.schedule import (EnterLoop, LeaveLoop, Barrier, RunInstruction)
+    from pytools import ImmutableRecord
 
     # go through linearization_items_ordered and generate pairwise sub-schedule
 
@@ -210,16 +160,16 @@ def generate_pairwise_schedule(
 
             if lp_insn_id == before_insn_id:
                 # add before sched item
-                stmt_instance_set_before = StatementInstanceSet(
-                        lp_insn_id,
-                        next_insn_lex_tuple[:])
+                stmt_instance_set_before = ImmutableRecord(
+                        insn_id=lp_insn_id,
+                        lex_points=next_insn_lex_tuple[:])
                 stmt_added = True
 
             if lp_insn_id == after_insn_id:
                 # add after sched item
-                stmt_instance_set_after = StatementInstanceSet(
-                        lp_insn_id,
-                        next_insn_lex_tuple[:])
+                stmt_instance_set_after = ImmutableRecord(
+                        insn_id=lp_insn_id,
+                        lex_points=next_insn_lex_tuple[:])
                 stmt_added = True
 
             # Note: before/after may refer to same stmt, in which case
@@ -248,9 +198,9 @@ def generate_pairwise_schedule(
     # be zero, so add them.
 
     def _pad_lex_tuple_with_zeros(stmt_inst, length):
-        return StatementInstanceSet(
-            stmt_inst.insn_id,
-            stmt_inst.lex_points[:] + [0]*(length-len(stmt_inst.lex_points)),
+        return ImmutableRecord(
+            insn_id=stmt_inst.insn_id,
+            lex_points=stmt_inst.lex_points[:] + [0]*(length-len(stmt_inst.lex_points))
             )
 
     stmt_instance_set_before = _pad_lex_tuple_with_zeros(

From 87933ce1f4a8b90c578b250a8d7c666af9479505 Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Tue, 14 Jul 2020 11:09:43 -0500
Subject: [PATCH 111/460] fix flake8 error

---
 loopy/schedule/checker/schedule.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/loopy/schedule/checker/schedule.py b/loopy/schedule/checker/schedule.py
index fc4938c98..6b047ed26 100644
--- a/loopy/schedule/checker/schedule.py
+++ b/loopy/schedule/checker/schedule.py
@@ -200,7 +200,8 @@ def generate_pairwise_schedule(
     def _pad_lex_tuple_with_zeros(stmt_inst, length):
         return ImmutableRecord(
             insn_id=stmt_inst.insn_id,
-            lex_points=stmt_inst.lex_points[:] + [0]*(length-len(stmt_inst.lex_points))
+            lex_points=stmt_inst.lex_points[:] + [0]*(
+                length-len(stmt_inst.lex_points))
             )
 
     stmt_instance_set_before = _pad_lex_tuple_with_zeros(

From e07b1476c6e04e094901412cc4ecce09562bc23f Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Sun, 19 Jul 2020 14:26:38 -0500
Subject: [PATCH 112/460] assert knl is preprocessed (rather than performing
 the preprocessing) in get_schedule_for_statement_pair()

---
 loopy/schedule/checker/__init__.py | 18 ++++++++++--------
 1 file changed, 10 insertions(+), 8 deletions(-)

diff --git a/loopy/schedule/checker/__init__.py b/loopy/schedule/checker/__init__.py
index a2963f689..ca1684ec4 100644
--- a/loopy/schedule/checker/__init__.py
+++ b/loopy/schedule/checker/__init__.py
@@ -36,7 +36,7 @@ def get_schedule_for_statement_pair(
     representing a pairwise schedule as two mappings from statement instances
     to lexicographic time.
 
-    :arg knl: A :class:`loopy.kernel.LoopKernel` containing the
+    :arg knl: A preprocessed :class:`loopy.kernel.LoopKernel` containing the
         linearization items that will be used to create a schedule.
 
     :arg linearization_items: A list of :class:`loopy.schedule.ScheduleItem`
@@ -106,10 +106,12 @@ def get_schedule_for_statement_pair(
 
     """
 
-    # {{{ preprocess if not already preprocessed
+    # {{{ make sure kernel has been preprocessed
 
-    from loopy import preprocess_kernel
-    preproc_knl = preprocess_kernel(knl)
+    from loopy.kernel import KernelState
+    assert knl.state in [
+            KernelState.PREPROCESSED,
+            KernelState.LINEARIZED]
 
     # }}}
 
@@ -122,15 +124,15 @@ def get_schedule_for_statement_pair(
         get_concurrent_inames,
         get_EnterLoop_inames,
     )
-    conc_inames, _ = get_concurrent_inames(preproc_knl)
-    enterloop_inames = get_EnterLoop_inames(linearization_items, preproc_knl)
+    conc_inames, _ = get_concurrent_inames(knl)
+    enterloop_inames = get_EnterLoop_inames(linearization_items, knl)
     conc_loop_inames = conc_inames & enterloop_inames
     if conc_loop_inames:
         from warnings import warn
         warn(
             "get_schedule_for_statement_pair encountered EnterLoop for inames %s "
             "with ConcurrentTag(s) in linearization for kernel %s. "
-            "Ignoring these loops." % (conc_loop_inames, preproc_knl.name))
+            "Ignoring these loops." % (conc_loop_inames, knl.name))
 
     # }}}
 
@@ -139,7 +141,7 @@ def get_schedule_for_statement_pair(
     # include only instructions involved in this dependency
     from loopy.schedule.checker.schedule import generate_pairwise_schedule
     return generate_pairwise_schedule(
-        preproc_knl,
+        knl,
         linearization_items,
         insn_id_before,
         insn_id_after,

From 17ed282c9955e399b3363993abd675ca571742d5 Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Sun, 19 Jul 2020 14:34:34 -0500
Subject: [PATCH 113/460] make lex_points a tuple instead of a list

---
 loopy/schedule/checker/schedule.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/loopy/schedule/checker/schedule.py b/loopy/schedule/checker/schedule.py
index 6b047ed26..8ee2e5106 100644
--- a/loopy/schedule/checker/schedule.py
+++ b/loopy/schedule/checker/schedule.py
@@ -162,14 +162,14 @@ def generate_pairwise_schedule(
                 # add before sched item
                 stmt_instance_set_before = ImmutableRecord(
                         insn_id=lp_insn_id,
-                        lex_points=next_insn_lex_tuple[:])
+                        lex_points=tuple(next_insn_lex_tuple[:]))
                 stmt_added = True
 
             if lp_insn_id == after_insn_id:
                 # add after sched item
                 stmt_instance_set_after = ImmutableRecord(
                         insn_id=lp_insn_id,
-                        lex_points=next_insn_lex_tuple[:])
+                        lex_points=tuple(next_insn_lex_tuple[:]))
                 stmt_added = True
 
             # Note: before/after may refer to same stmt, in which case
@@ -200,8 +200,8 @@ def generate_pairwise_schedule(
     def _pad_lex_tuple_with_zeros(stmt_inst, length):
         return ImmutableRecord(
             insn_id=stmt_inst.insn_id,
-            lex_points=stmt_inst.lex_points[:] + [0]*(
-                length-len(stmt_inst.lex_points))
+            lex_points=stmt_inst.lex_points[:] + tuple(
+                [0]*(length-len(stmt_inst.lex_points)))
             )
 
     stmt_instance_set_before = _pad_lex_tuple_with_zeros(

From 41e8acbbcbf265e3b28d18fdd6a5228d71f569e5 Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Sun, 19 Jul 2020 14:37:52 -0500
Subject: [PATCH 114/460] renamed
 linearization_items_ordered->linearization_items in
 generate_pairwise_schedule()

---
 loopy/schedule/checker/schedule.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/loopy/schedule/checker/schedule.py b/loopy/schedule/checker/schedule.py
index 8ee2e5106..838c5f74b 100644
--- a/loopy/schedule/checker/schedule.py
+++ b/loopy/schedule/checker/schedule.py
@@ -49,7 +49,7 @@
 
 def generate_pairwise_schedule(
         knl,
-        linearization_items_ordered,
+        linearization_items,
         before_insn_id,
         after_insn_id,
         loops_to_ignore=set(),
@@ -66,8 +66,8 @@ def generate_pairwise_schedule(
         kernel will be used to get the domains associated with the inames
         used in the statements.
 
-    :arg linearization_items_ordered: A list of :class:`loopy.schedule.ScheduleItem`
-        (to be renamed to `loopy.schedule.LinearizationItem`) containing the
+    :arg linearization_items: A list of :class:`loopy.schedule.ScheduleItem`
+        (to be renamed to `loopy.schedule.LinearizationItem`) including the
         two linearization items whose relative order will be described by the
         schedule. This list may be a *partial* linearization for a kernel since
         this function may be used during the linearization process.
@@ -94,14 +94,14 @@ def generate_pairwise_schedule(
     from loopy.schedule import (EnterLoop, LeaveLoop, Barrier, RunInstruction)
     from pytools import ImmutableRecord
 
-    # go through linearization_items_ordered and generate pairwise sub-schedule
+    # go through linearization_items and generate pairwise sub-schedule
 
     # keep track of the next tuple of points in our lexicographic
     # ordering, initially this as a 1-d point with value 0
     next_insn_lex_tuple = [0]
     stmt_added_since_prev_block_at_tier = [False]
     max_lex_dim = 0
-    for linearization_item in linearization_items_ordered:
+    for linearization_item in linearization_items:
         if isinstance(linearization_item, EnterLoop):
             iname = linearization_item.iname
             if iname in loops_to_ignore:

From bb5128d61f15cfa8fdee667e5f93c02d6bf88c00 Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Sun, 19 Jul 2020 14:49:39 -0500
Subject: [PATCH 115/460] when creating schedule, if lp_insn_id is None, assert
 that the linearization item is a barrier; update comment explaining this
 scenario; add fixme about potential future work

---
 loopy/schedule/checker/schedule.py | 14 ++++++++++----
 1 file changed, 10 insertions(+), 4 deletions(-)

diff --git a/loopy/schedule/checker/schedule.py b/loopy/schedule/checker/schedule.py
index 838c5f74b..c71381761 100644
--- a/loopy/schedule/checker/schedule.py
+++ b/loopy/schedule/checker/schedule.py
@@ -148,11 +148,17 @@ def generate_pairwise_schedule(
                 get_insn_id_from_linearization_item,
             )
             lp_insn_id = get_insn_id_from_linearization_item(linearization_item)
+
             if lp_insn_id is None:
-                # TODO make sure it's okay to ignore barriers without id
-                # (because they'll never be part of a dependency?)
-                # matmul example has barrier that fails this assertion...
-                # assert linearization_item.originating_insn_id is not None
+                assert isinstance(linearization_item, Barrier)
+
+                # Barriers without insn ids were inserted as a result of a
+                # dependency. They don't themselves have dependencies. Ignore them.
+
+                # FIXME: It's possible that we could record metadata about them
+                # (e.g. what dependency produced them) and verify that they're
+                # adequately protecting all statement instance pairs.
+
                 continue
 
             # only process before/after insns, otherwise ignore

From 3550e6d35dcfe5baf9c759024170fcf887b73e60 Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Sun, 19 Jul 2020 16:49:00 -0500
Subject: [PATCH 116/460] fix indentation on function docstring

---
 loopy/schedule/checker/utils.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/loopy/schedule/checker/utils.py b/loopy/schedule/checker/utils.py
index 0bfdf0a54..abd06685a 100644
--- a/loopy/schedule/checker/utils.py
+++ b/loopy/schedule/checker/utils.py
@@ -153,8 +153,8 @@ def create_symbolic_map_from_tuples(
         space,
         ):
     """Return an :class:`islpy.Map` constructed using the provided space,
-        mapping input->output tuples provided in `tuple_pairs_with_domains`,
-        with each set of tuple variables constrained by the domains provided.
+    mapping input->output tuples provided in `tuple_pairs_with_domains`,
+    with each set of tuple variables constrained by the domains provided.
 
     :arg tuple_pairs_with_domains: A :class:`list` with each element being
         a tuple of the form `((tup_in, tup_out), domain)`.

From 38d584a9409cbb3486f0e05cf94258a2455c7ad2 Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Sun, 19 Jul 2020 16:55:45 -0500
Subject: [PATCH 117/460] fix pluralizing class in docstring

---
 loopy/schedule/checker/schedule.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/loopy/schedule/checker/schedule.py b/loopy/schedule/checker/schedule.py
index c71381761..5cbe4c463 100644
--- a/loopy/schedule/checker/schedule.py
+++ b/loopy/schedule/checker/schedule.py
@@ -78,7 +78,7 @@ def generate_pairwise_schedule(
     :arg after_insn_id: A :class:`str` instruction id specifying
         stmt_instance_set_after in this pair of instructions.
 
-    :returns: A two-tuple containing two :class:`islpy.Map`s
+    :returns: A two-tuple containing two :class:`islpy.Map`\ s
         representing the a pairwise schedule as two mappings
         from statement instances to lexicographic time, one for
         each of the two statements.

From 18e468134f3b6e8fc36838d178207933deb53115 Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Mon, 20 Jul 2020 14:21:59 -0500
Subject: [PATCH 118/460] assert that ignored linearization items are of
 specific type

---
 loopy/schedule/checker/schedule.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/loopy/schedule/checker/schedule.py b/loopy/schedule/checker/schedule.py
index 5cbe4c463..6d4382c11 100644
--- a/loopy/schedule/checker/schedule.py
+++ b/loopy/schedule/checker/schedule.py
@@ -194,6 +194,10 @@ def generate_pairwise_schedule(
                 stmt_added_since_prev_block_at_tier = [True]*len(
                     stmt_added_since_prev_block_at_tier)
         else:
+            from loopy.schedule import (CallKernel, ReturnFromKernel)
+            # no action needed for these types of linearization item
+            assert isinstance(
+                linearization_item, (CallKernel, ReturnFromKernel))
             pass
         # to save time, stop when we've created both statements
         if stmt_instance_set_before and stmt_instance_set_after:

From 1f4edf2e678e257f30ec32d8ffc65b9b1a7e0fed Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Mon, 20 Jul 2020 14:46:16 -0500
Subject: [PATCH 119/460] use set().union(*[list comprehension]) instead of
 loop in list_var_names_in_isl_sets()

---
 loopy/schedule/checker/utils.py | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/loopy/schedule/checker/utils.py b/loopy/schedule/checker/utils.py
index abd06685a..6c6cf160d 100644
--- a/loopy/schedule/checker/utils.py
+++ b/loopy/schedule/checker/utils.py
@@ -140,12 +140,11 @@ def ensure_dim_names_match_and_align(obj_map, tgt_map):
 def list_var_names_in_isl_sets(
         isl_sets,
         set_dim=isl.dim_type.set):
-    inames = set()
-    for isl_set in isl_sets:
-        inames.update(isl_set.get_var_names(set_dim))
+
+    inames = set().union(*[isl_set.get_var_names(set_dim) for isl_set in isl_sets])
 
     # sorting is not necessary, but keeps results consistent between runs
-    return sorted(list(inames))
+    return sorted(inames)
 
 
 def create_symbolic_map_from_tuples(

From 7635f47bfafadcd9936a7be8b728b71ed31e75d0 Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Mon, 20 Jul 2020 14:55:19 -0500
Subject: [PATCH 120/460] renamed
 list_var_names_in_isl_sets()->sorted_union_of_names_in_isl_sets()

---
 loopy/schedule/checker/schedule.py | 4 ++--
 loopy/schedule/checker/utils.py    | 5 ++++-
 2 files changed, 6 insertions(+), 3 deletions(-)

diff --git a/loopy/schedule/checker/schedule.py b/loopy/schedule/checker/schedule.py
index 6d4382c11..b56040adf 100644
--- a/loopy/schedule/checker/schedule.py
+++ b/loopy/schedule/checker/schedule.py
@@ -222,7 +222,7 @@ def _pad_lex_tuple_with_zeros(stmt_inst, length):
     # Now generate maps from the blueprint ---------------------------------------
 
     from loopy.schedule.checker.utils import (
-        list_var_names_in_isl_sets,
+        sorted_union_of_names_in_isl_sets,
         create_symbolic_map_from_tuples,
         add_dims_to_isl_set,
     )
@@ -239,7 +239,7 @@ def _get_map_for_stmt_inst(stmt_inst, int_sid):
         # create space (an isl space in current implementation)
         # {('statement', <inames> used in statement domain>) ->
         #  (lexicographic ordering dims)}
-        dom_inames_ordered = list_var_names_in_isl_sets([dom])
+        dom_inames_ordered = sorted_union_of_names_in_isl_sets([dom])
 
         in_names_sched = [STATEMENT_VAR_NAME] + dom_inames_ordered[:]
         sched_space = isl.Space.create_from_names(
diff --git a/loopy/schedule/checker/utils.py b/loopy/schedule/checker/utils.py
index 6c6cf160d..b845edacc 100644
--- a/loopy/schedule/checker/utils.py
+++ b/loopy/schedule/checker/utils.py
@@ -137,9 +137,12 @@ def ensure_dim_names_match_and_align(obj_map, tgt_map):
     return aligned_obj_map
 
 
-def list_var_names_in_isl_sets(
+def sorted_union_of_names_in_isl_sets(
         isl_sets,
         set_dim=isl.dim_type.set):
+    """Return a sorted list of the union of all variable names found in
+    the provided :class:`islpy.Set`\ s.
+    """
 
     inames = set().union(*[isl_set.get_var_names(set_dim) for isl_set in isl_sets])
 

From 227762f5a6e345843584495310c138fac3c684a4 Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Mon, 20 Jul 2020 15:04:54 -0500
Subject: [PATCH 121/460] rename constraint->condition in
 create_symbolic_map_from_tuples()

---
 loopy/schedule/checker/utils.py | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/loopy/schedule/checker/utils.py b/loopy/schedule/checker/utils.py
index b845edacc..92219b875 100644
--- a/loopy/schedule/checker/utils.py
+++ b/loopy/schedule/checker/utils.py
@@ -172,7 +172,7 @@ def create_symbolic_map_from_tuples(
         `tuple_pairs_with_domains`, map
         `(tup_in)->(tup_out) : domain`, where `tup_in` and `tup_out` are
         numeric or symbolic values assigned to the input and output
-        dimension variables in `space`, and `domain` specifies constraints
+        dimension variables in `space`, and `domain` specifies conditions
         on these values.
 
     """
@@ -194,31 +194,31 @@ def create_symbolic_map_from_tuples(
             dim_type.out, 0, dim_type.in_, len(space_in_names), len(space_out_names))
     for (tup_in, tup_out), dom in tuple_pairs_with_domains:
 
-        # initialize constraint with true
-        constraint = islvars[0].eq_set(islvars[0])
+        # initialize condition with true
+        condition = islvars[0].eq_set(islvars[0])
 
         # set values for 'in' dimension using tuple vals
         assert len(tup_in) == len(space_in_names)
         for dim_name, val_in in zip(space_in_names, tup_in):
             if isinstance(val_in, int):
-                constraint = constraint \
+                condition = condition \
                     & islvars[dim_name].eq_set(islvars[0]+val_in)
             else:
-                constraint = constraint \
+                condition = condition \
                     & islvars[dim_name].eq_set(islvars[val_in])
 
         # set values for 'out' dimension using tuple vals
         assert len(tup_out) == len(space_out_names)
         for dim_name, val_out in zip(space_out_names, tup_out):
             if isinstance(val_out, int):
-                constraint = constraint \
+                condition = condition \
                     & islvars[dim_name].eq_set(islvars[0]+val_out)
             else:
-                constraint = constraint \
+                condition = condition \
                     & islvars[dim_name].eq_set(islvars[val_out])
 
         # convert set to map by moving dimensions around
-        map_from_set = isl.Map.from_domain(constraint)
+        map_from_set = isl.Map.from_domain(condition)
         map_from_set = map_from_set.move_dims(
             dim_type.out, 0, dim_type.in_,
             len(space_in_names), len(space_out_names))

From 6d336bae34cc43ed628d9fa67f56161a4d7dbb20 Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Mon, 20 Jul 2020 15:48:56 -0500
Subject: [PATCH 122/460] make function for duplicated code:
 _conjunction_of_dim_eq_conditions()

---
 loopy/schedule/checker/utils.py | 34 +++++++++++++++------------------
 1 file changed, 15 insertions(+), 19 deletions(-)

diff --git a/loopy/schedule/checker/utils.py b/loopy/schedule/checker/utils.py
index 92219b875..2cf8e90fb 100644
--- a/loopy/schedule/checker/utils.py
+++ b/loopy/schedule/checker/utils.py
@@ -187,6 +187,17 @@ def create_symbolic_map_from_tuples(
 
     # loop through pairs and create a set that will later be converted to a map
 
+    def _conjunction_of_dim_eq_conditions(dim_names, values, islvars):
+        condition = islvars[0].eq_set(islvars[0])
+        for dim_name, val in zip(dim_names, values):
+            if isinstance(val, int):
+                condition = condition \
+                    & islvars[dim_name].eq_set(islvars[0]+val)
+            else:
+                condition = condition \
+                    & islvars[dim_name].eq_set(islvars[val])
+        return condition
+
     # initialize union to empty
     union_of_maps = isl.Map.from_domain(
         islvars[0].eq_set(islvars[0]+1)  # 0 == 1 (false)
@@ -194,28 +205,13 @@ def create_symbolic_map_from_tuples(
             dim_type.out, 0, dim_type.in_, len(space_in_names), len(space_out_names))
     for (tup_in, tup_out), dom in tuple_pairs_with_domains:
 
-        # initialize condition with true
-        condition = islvars[0].eq_set(islvars[0])
-
         # set values for 'in' dimension using tuple vals
-        assert len(tup_in) == len(space_in_names)
-        for dim_name, val_in in zip(space_in_names, tup_in):
-            if isinstance(val_in, int):
-                condition = condition \
-                    & islvars[dim_name].eq_set(islvars[0]+val_in)
-            else:
-                condition = condition \
-                    & islvars[dim_name].eq_set(islvars[val_in])
+        condition = _conjunction_of_dim_eq_conditions(
+            space_in_names, tup_in, islvars)
 
         # set values for 'out' dimension using tuple vals
-        assert len(tup_out) == len(space_out_names)
-        for dim_name, val_out in zip(space_out_names, tup_out):
-            if isinstance(val_out, int):
-                condition = condition \
-                    & islvars[dim_name].eq_set(islvars[0]+val_out)
-            else:
-                condition = condition \
-                    & islvars[dim_name].eq_set(islvars[val_out])
+        condition = condition & _conjunction_of_dim_eq_conditions(
+            space_out_names, tup_out, islvars)
 
         # convert set to map by moving dimensions around
         map_from_set = isl.Map.from_domain(condition)

From a81bd480459cbddb9b9c025e60407ca5cbaf3192 Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Mon, 20 Jul 2020 15:56:36 -0500
Subject: [PATCH 123/460] rename
 get_concurrent_inames(knl)->partition_inames_by_concurrency(knl)

---
 loopy/schedule/checker/__init__.py | 4 ++--
 loopy/schedule/checker/utils.py    | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/loopy/schedule/checker/__init__.py b/loopy/schedule/checker/__init__.py
index ca1684ec4..466837534 100644
--- a/loopy/schedule/checker/__init__.py
+++ b/loopy/schedule/checker/__init__.py
@@ -121,10 +121,10 @@ def get_schedule_for_statement_pair(
     # won't be any inames with ConcurrentTags in EnterLoop linearization items.
     # Test which exercises this: test_linearization_checker_with_stroud_bernstein())
     from loopy.schedule.checker.utils import (
-        get_concurrent_inames,
+        partition_inames_by_concurrency,
         get_EnterLoop_inames,
     )
-    conc_inames, _ = get_concurrent_inames(knl)
+    conc_inames, _ = partition_inames_by_concurrency(knl)
     enterloop_inames = get_EnterLoop_inames(linearization_items, knl)
     conc_loop_inames = conc_inames & enterloop_inames
     if conc_loop_inames:
diff --git a/loopy/schedule/checker/utils.py b/loopy/schedule/checker/utils.py
index 2cf8e90fb..bad083558 100644
--- a/loopy/schedule/checker/utils.py
+++ b/loopy/schedule/checker/utils.py
@@ -237,7 +237,7 @@ def _conjunction_of_dim_eq_conditions(dim_names, values, islvars):
     return union_of_maps
 
 
-def get_concurrent_inames(knl):
+def partition_inames_by_concurrency(knl):
     from loopy.kernel.data import ConcurrentTag
     conc_inames = set()
     non_conc_inames = set()

From be1cf81556b359c5c63fe21d6f2909198b83732d Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Tue, 21 Jul 2020 11:14:32 -0500
Subject: [PATCH 124/460] to prevent quadratic complexity in schedule creation,
 create lex points for all relevant insn_ids simultaneously; then afterward
 process pairs individually; (also eliminate more unnecessary lex dims)

---
 loopy/schedule/checker/__init__.py |  14 ++-
 loopy/schedule/checker/schedule.py | 138 +++++++++++++++--------------
 test/test_linearization_checker.py |  68 +++++++-------
 3 files changed, 107 insertions(+), 113 deletions(-)

diff --git a/loopy/schedule/checker/__init__.py b/loopy/schedule/checker/__init__.py
index 466837534..fa9700b47 100644
--- a/loopy/schedule/checker/__init__.py
+++ b/loopy/schedule/checker/__init__.py
@@ -21,13 +21,12 @@
 """
 
 
-# {{{ create a pairwise schedule for statement pair
+# {{{ create a pairwise schedules for statement pairs
 
-def get_schedule_for_statement_pair(
+def get_schedules_for_statement_pairs(
         knl,
         linearization_items,
-        insn_id_before,
-        insn_id_after,
+        insn_id_pairs,
         ):
     r"""Given a pair of statements in a linearized kernel, determine
     the (relative) order in which the instances are executed,
@@ -139,12 +138,11 @@ def get_schedule_for_statement_pair(
     # {{{ Create two mappings from {statement instance: lex point}
 
     # include only instructions involved in this dependency
-    from loopy.schedule.checker.schedule import generate_pairwise_schedule
-    return generate_pairwise_schedule(
+    from loopy.schedule.checker.schedule import generate_pairwise_schedules
+    return generate_pairwise_schedules(
         knl,
         linearization_items,
-        insn_id_before,
-        insn_id_after,
+        insn_id_pairs,
         loops_to_ignore=conc_loop_inames,
         )
 
diff --git a/loopy/schedule/checker/schedule.py b/loopy/schedule/checker/schedule.py
index b56040adf..6b9905613 100644
--- a/loopy/schedule/checker/schedule.py
+++ b/loopy/schedule/checker/schedule.py
@@ -47,11 +47,10 @@
 STATEMENT_VAR_NAME = "%sstatement" % (LIN_CHECK_IDENTIFIER_PREFIX)
 
 
-def generate_pairwise_schedule(
+def generate_pairwise_schedules(
         knl,
         linearization_items,
-        before_insn_id,
-        after_insn_id,
+        insn_id_pairs,
         loops_to_ignore=set(),
         ):
     r"""Given a pair of statements in a linearized kernel, determine
@@ -79,17 +78,21 @@ def generate_pairwise_schedule(
         stmt_instance_set_after in this pair of instructions.
 
     :returns: A two-tuple containing two :class:`islpy.Map`\ s
-        representing the a pairwise schedule as two mappings
+        representing a pairwise schedule as two mappings
         from statement instances to lexicographic time, one for
         each of the two statements.
     """
 
+    # TODO
+    # update documentation
+
+    all_insn_ids = set().union(*insn_id_pairs)
+
     # For each statement, create a :class:`ImmutableRecord` describing the set of
     # statement instances. Contains the insn_id and a list representing points
     # in the lexicographic ordering containing items of :class:`int` or
     # :class:`str` :mod:`loopy` inames.
-    stmt_instance_set_before = None
-    stmt_instance_set_after = None
+    stmt_instances = {}
 
     from loopy.schedule import (EnterLoop, LeaveLoop, Barrier, RunInstruction)
     from pytools import ImmutableRecord
@@ -161,31 +164,10 @@ def generate_pairwise_schedule(
 
                 continue
 
-            # only process before/after insns, otherwise ignore
-            stmt_added = False
-
-            if lp_insn_id == before_insn_id:
-                # add before sched item
-                stmt_instance_set_before = ImmutableRecord(
-                        insn_id=lp_insn_id,
-                        lex_points=tuple(next_insn_lex_tuple[:]))
-                stmt_added = True
-
-            if lp_insn_id == after_insn_id:
-                # add after sched item
-                stmt_instance_set_after = ImmutableRecord(
-                        insn_id=lp_insn_id,
-                        lex_points=tuple(next_insn_lex_tuple[:]))
-                stmt_added = True
-
-            # Note: before/after may refer to same stmt, in which case
-            # both of the above conditionals execute
-
-            if stmt_added:
-
-                # track the max number of lex dims used
-                if len(next_insn_lex_tuple) > max_lex_dim:
-                    max_lex_dim = len(next_insn_lex_tuple)
+            # only process listed insns, otherwise ignore
+            if lp_insn_id in all_insn_ids:
+                # add item
+                stmt_instances[lp_insn_id] = tuple(next_insn_lex_tuple[:])
 
                 # increment lex dim val enumerating items in current code block
                 next_insn_lex_tuple[-1] = next_insn_lex_tuple[-1] + 1
@@ -199,27 +181,10 @@ def generate_pairwise_schedule(
             assert isinstance(
                 linearization_item, (CallKernel, ReturnFromKernel))
             pass
-        # to save time, stop when we've created both statements
-        if stmt_instance_set_before and stmt_instance_set_after:
-            break
-
-    # At this point, pairwise sub-schedule may contain lex point tuples
-    # missing dimensions; the values in these missing dims should
-    # be zero, so add them.
-
-    def _pad_lex_tuple_with_zeros(stmt_inst, length):
-        return ImmutableRecord(
-            insn_id=stmt_inst.insn_id,
-            lex_points=stmt_inst.lex_points[:] + tuple(
-                [0]*(length-len(stmt_inst.lex_points)))
-            )
 
-    stmt_instance_set_before = _pad_lex_tuple_with_zeros(
-        stmt_instance_set_before, max_lex_dim)
-    stmt_instance_set_after = _pad_lex_tuple_with_zeros(
-        stmt_instance_set_after, max_lex_dim)
-
-    # Now generate maps from the blueprint ---------------------------------------
+        # to save time, stop when we've created all statements
+        if len(stmt_instances.keys()) == all_insn_ids:
+            break
 
     from loopy.schedule.checker.utils import (
         sorted_union_of_names_in_isl_sets,
@@ -227,14 +192,28 @@ def _pad_lex_tuple_with_zeros(stmt_inst, length):
         add_dims_to_isl_set,
     )
 
-    params_sched = []
-    out_names_sched = [LEX_VAR_PREFIX+str(i) for i in range(max_lex_dim)]
-
-    def _get_map_for_stmt_inst(stmt_inst, int_sid):
+    def _pad_tuple_with_zeros(tup, length):
+        return tup[:] + tuple([0]*(length-len(tup)))
+
+    def _remove_matching_integer_dims(tup0, tup1):
+        new_tup0 = []
+        new_tup1 = []
+        for d0, d1 in zip(tup0, tup1):
+            if not (
+                isinstance(d0, int) and
+                isinstance(d1, int) and
+                d0 == d1):
+                # keep this dim
+                new_tup0.append(d0)
+                new_tup1.append(d1)
+        # TODO? also change all ints to 0 or 1
+        return tuple(new_tup0), tuple(new_tup1)
+
+    def _get_map_for_stmt_inst(insn_id, lex_points, int_sid, out_names_sched):
 
         # Get inames domain for statement instance (a BasicSet)
         dom = knl.get_inames_domain(
-            knl.id_to_insn[stmt_inst.insn_id].within_inames)
+            knl.id_to_insn[insn_id].within_inames)
 
         # create space (an isl space in current implementation)
         # {('statement', <inames> used in statement domain>) ->
@@ -244,7 +223,7 @@ def _get_map_for_stmt_inst(stmt_inst, int_sid):
         in_names_sched = [STATEMENT_VAR_NAME] + dom_inames_ordered[:]
         sched_space = isl.Space.create_from_names(
             isl.DEFAULT_CONTEXT,
-            in_=in_names_sched, out=out_names_sched, params=params_sched)
+            in_=in_names_sched, out=out_names_sched, params=[])
 
         # Insert 'statement' dim into domain so that its space allows
         # for intersection with sched map later
@@ -258,7 +237,7 @@ def _get_map_for_stmt_inst(stmt_inst, int_sid):
         # Add all inames from domains to each map domain tuple.
         tuple_pair = [(
             (int_sid, ) + tuple(dom_inames_ordered),
-            stmt_inst.lex_points
+            lex_points
             )]
 
         # create map
@@ -267,15 +246,38 @@ def _get_map_for_stmt_inst(stmt_inst, int_sid):
             space=sched_space,
             )
 
-    # Determine integer IDs that will represent each statement in mapping
-    # (dependency map creation assumes sid_before=0 and sid_after=1, unless
-    # before and after refer to same stmt, in which case sid_before=sid_after=0)
-    int_sid_before = 0
-    int_sid_after = 0 if (
-        stmt_instance_set_before.insn_id == stmt_instance_set_after.insn_id
-        ) else 1
+    pairwise_schedules = []
+    for insn_id_before, insn_id_after in insn_id_pairs:
+        lex_tup_before = stmt_instances[insn_id_before]
+        lex_tup_after = stmt_instances[insn_id_after]
+
+        # simplify tuples to the extent possible -------------------------------------
+
+        # At this point, pairwise sub-schedule may contain lex point tuples
+        # missing dimensions; the values in these missing dims should
+        # be zero, so add them.
+        max_lex_dims = max(len(lex_tup_before), len(lex_tup_after))
+        lex_tup_before = _pad_tuple_with_zeros(lex_tup_before, max_lex_dims)
+        lex_tup_after = _pad_tuple_with_zeros(lex_tup_after, max_lex_dims)
+
+        lex_tup_before, lex_tup_after = _remove_matching_integer_dims(
+            lex_tup_before, lex_tup_after)
+
+        # Now generate maps from the blueprint ---------------------------------------
+
+        out_names_sched = [LEX_VAR_PREFIX+str(i) for i in range(len(lex_tup_before))]
+
+        # Determine integer IDs that will represent each statement in mapping
+        # (dependency map creation assumes sid_before=0 and sid_after=1, unless
+        # before and after refer to same stmt, in which case sid_before=sid_after=0)
+        int_sid_before = 0
+        int_sid_after = 0 if insn_id_before == insn_id_after else 1
+
+        map_before = _get_map_for_stmt_inst(
+            insn_id_before, lex_tup_before, int_sid_before, out_names_sched)
+        map_after = _get_map_for_stmt_inst(
+            insn_id_after, lex_tup_after, int_sid_after, out_names_sched)
 
-    map_before = _get_map_for_stmt_inst(stmt_instance_set_before, int_sid_before)
-    map_after = _get_map_for_stmt_inst(stmt_instance_set_after, int_sid_after)
+        pairwise_schedules.append((map_before, map_after))
 
-    return (map_before, map_after)
+    return pairwise_schedules
diff --git a/test/test_linearization_checker.py b/test/test_linearization_checker.py
index 60abfade0..716737bdd 100644
--- a/test/test_linearization_checker.py
+++ b/test/test_linearization_checker.py
@@ -53,7 +53,7 @@
 def test_lexschedule_creation():
     import islpy as isl
     from loopy.schedule.checker import (
-        get_schedule_for_statement_pair,
+        get_schedules_for_statement_pairs,
     )
     from loopy.schedule.checker.utils import (
         ensure_dim_names_match_and_align,
@@ -115,12 +115,11 @@ def _lex_space_string(dim_vals):
     # Relationship between insn_a and insn_b ---------------------------------------
 
     # Get two maps
-    sched_map_before, sched_map_after = get_schedule_for_statement_pair(
+    sched_map_before, sched_map_after = get_schedules_for_statement_pairs(
         knl,
         linearization_items,
-        "insn_a",
-        "insn_b",
-        )
+        [("insn_a", "insn_b")],
+        )[0]
 
     # Create expected maps, align, compare
 
@@ -128,7 +127,7 @@ def _lex_space_string(dim_vals):
         "[pi, pk] -> { [%s=0, i, k] -> [%s] : 0 <= i < pi and 0 <= k < pk }"
         % (
             STATEMENT_VAR_NAME,
-            _lex_space_string(["0", "i", "0", "k", "0"]),
+            _lex_space_string(["i", "0", "k"]),
             )
         )
     sched_map_before_expected = ensure_dim_names_match_and_align(
@@ -138,7 +137,7 @@ def _lex_space_string(dim_vals):
         "[pi, pj] -> { [%s=1, i, j] -> [%s] : 0 <= i < pi and 0 <= j < pj }"
         % (
             STATEMENT_VAR_NAME,
-            _lex_space_string(["0", "i", "1", "j", "0"]),
+            _lex_space_string(["i", "1", "j"]),
             )
         )
     sched_map_after_expected = ensure_dim_names_match_and_align(
@@ -151,12 +150,11 @@ def _lex_space_string(dim_vals):
     # Relationship between insn_a and insn_c ---------------------------------------
 
     # Get two maps
-    sched_map_before, sched_map_after = get_schedule_for_statement_pair(
+    sched_map_before, sched_map_after = get_schedules_for_statement_pairs(
         knl,
         linearization_items,
-        "insn_a",
-        "insn_c",
-        )
+        [("insn_a", "insn_c")],
+        )[0]
 
     # Create expected maps, align, compare
 
@@ -164,7 +162,7 @@ def _lex_space_string(dim_vals):
         "[pi, pk] -> { [%s=0, i, k] -> [%s] : 0 <= i < pi and 0 <= k < pk }"
         % (
             STATEMENT_VAR_NAME,
-            _lex_space_string(["0", "i", "0", "k", "0"]),
+            _lex_space_string(["i", "0", "k"]),
             )
         )
     sched_map_before_expected = ensure_dim_names_match_and_align(
@@ -174,7 +172,7 @@ def _lex_space_string(dim_vals):
         "[pi, pj] -> { [%s=1, i, j] -> [%s] : 0 <= i < pi and 0 <= j < pj }"
         % (
             STATEMENT_VAR_NAME,
-            _lex_space_string(["0", "i", "1", "j", "0"]),
+            _lex_space_string(["i", "1", "j"]),
             )
         )
     sched_map_after_expected = ensure_dim_names_match_and_align(
@@ -190,12 +188,11 @@ def _lex_space_string(dim_vals):
     # (i loop could be before or after t loop)
     def perform_insn_ad_checks_with(a_lex_idx, d_lex_idx):
         # Get two maps
-        sched_map_before, sched_map_after = get_schedule_for_statement_pair(
+        sched_map_before, sched_map_after = get_schedules_for_statement_pairs(
             knl,
             linearization_items,
-            "insn_a",
-            "insn_d",
-            )
+            [("insn_a", "insn_d")],
+            )[0]
 
         # Create expected maps, align, compare
 
@@ -203,7 +200,7 @@ def perform_insn_ad_checks_with(a_lex_idx, d_lex_idx):
             "[pi, pk] -> { [%s=0, i, k] -> [%s] : 0 <= i < pi and 0 <= k < pk }"
             % (
                 STATEMENT_VAR_NAME,
-                _lex_space_string([a_lex_idx, "i", "0", "k", "0"]),
+                _lex_space_string([a_lex_idx, "i", "k"]),
                 )
             )
         sched_map_before_expected = ensure_dim_names_match_and_align(
@@ -213,7 +210,7 @@ def perform_insn_ad_checks_with(a_lex_idx, d_lex_idx):
             "[pt] -> { [%s=1, t] -> [%s] : 0 <= t < pt }"
             % (
                 STATEMENT_VAR_NAME,
-                _lex_space_string([d_lex_idx, "t", "0", "0", "0"]),
+                _lex_space_string([d_lex_idx, "t", "0"]),
                 )
             )
         sched_map_after_expected = ensure_dim_names_match_and_align(
@@ -236,12 +233,11 @@ def perform_insn_ad_checks_with(a_lex_idx, d_lex_idx):
     # (i loop could be before or after t loop)
     def perform_insn_bc_checks_with(b_lex_idx, c_lex_idx):
         # Get two maps
-        sched_map_before, sched_map_after = get_schedule_for_statement_pair(
+        sched_map_before, sched_map_after = get_schedules_for_statement_pairs(
             knl,
             linearization_items,
-            "insn_b",
-            "insn_c",
-            )
+            [("insn_b", "insn_c")],
+            )[0]
 
         # Create expected maps, align, compare
 
@@ -249,7 +245,7 @@ def perform_insn_bc_checks_with(b_lex_idx, c_lex_idx):
             "[pi, pj] -> { [%s=0, i, j] -> [%s] : 0 <= i < pi and 0 <= j < pj }"
             % (
                 STATEMENT_VAR_NAME,
-                _lex_space_string(["0", "i", "0", "j", b_lex_idx]),
+                _lex_space_string(["i", "j", b_lex_idx]),
                 )
             )
         sched_map_before_expected = ensure_dim_names_match_and_align(
@@ -259,7 +255,7 @@ def perform_insn_bc_checks_with(b_lex_idx, c_lex_idx):
             "[pi, pj] -> { [%s=1, i, j] -> [%s] : 0 <= i < pi and 0 <= j < pj }"
             % (
                 STATEMENT_VAR_NAME,
-                _lex_space_string(["0", "i", "0", "j", c_lex_idx]),
+                _lex_space_string(["i", "j", c_lex_idx]),
                 )
             )
         sched_map_after_expected = ensure_dim_names_match_and_align(
@@ -282,12 +278,11 @@ def perform_insn_bc_checks_with(b_lex_idx, c_lex_idx):
     # (i loop could be before or after t loop)
     def perform_insn_bd_checks_with(b_lex_idx, d_lex_idx):
         # Get two maps
-        sched_map_before, sched_map_after = get_schedule_for_statement_pair(
+        sched_map_before, sched_map_after = get_schedules_for_statement_pairs(
             knl,
             linearization_items,
-            "insn_b",
-            "insn_d",
-            )
+            [("insn_b", "insn_d")],
+            )[0]
 
         # Create expected maps, align, compare
 
@@ -295,7 +290,7 @@ def perform_insn_bd_checks_with(b_lex_idx, d_lex_idx):
             "[pi, pj] -> { [%s=0, i, j] -> [%s] : 0 <= i < pi and 0 <= j < pj }"
             % (
                 STATEMENT_VAR_NAME,
-                _lex_space_string([b_lex_idx, "i", "0", "j", "0"]),
+                _lex_space_string([b_lex_idx, "i", "j"]),
                 )
             )
         sched_map_before_expected = ensure_dim_names_match_and_align(
@@ -305,7 +300,7 @@ def perform_insn_bd_checks_with(b_lex_idx, d_lex_idx):
             "[pt] -> { [%s=1, t] -> [%s] : 0 <= t < pt }"
             % (
                 STATEMENT_VAR_NAME,
-                _lex_space_string([d_lex_idx, "t", "0", "0", "0"]),
+                _lex_space_string([d_lex_idx, "t", "0"]),
                 )
             )
         sched_map_after_expected = ensure_dim_names_match_and_align(
@@ -328,12 +323,11 @@ def perform_insn_bd_checks_with(b_lex_idx, d_lex_idx):
     # (i loop could be before or after t loop)
     def perform_insn_cd_checks_with(c_lex_idx, d_lex_idx):
         # Get two maps
-        sched_map_before, sched_map_after = get_schedule_for_statement_pair(
+        sched_map_before, sched_map_after = get_schedules_for_statement_pairs(
             knl,
             linearization_items,
-            "insn_c",
-            "insn_d",
-            )
+            [("insn_c", "insn_d")],
+            )[0]
 
         # Create expected maps, align, compare
 
@@ -341,7 +335,7 @@ def perform_insn_cd_checks_with(c_lex_idx, d_lex_idx):
             "[pi, pj] -> { [%s=0, i, j] -> [%s] : 0 <= i < pi and 0 <= j < pj }"
             % (
                 STATEMENT_VAR_NAME,
-                _lex_space_string([c_lex_idx, "i", "0", "j", "0"]),
+                _lex_space_string([c_lex_idx, "i", "j"]),
                 )
             )
         sched_map_before_expected = ensure_dim_names_match_and_align(
@@ -351,7 +345,7 @@ def perform_insn_cd_checks_with(c_lex_idx, d_lex_idx):
             "[pt] -> { [%s=1, t] -> [%s] : 0 <= t < pt }"
             % (
                 STATEMENT_VAR_NAME,
-                _lex_space_string([d_lex_idx, "t", "0", "0", "0"]),
+                _lex_space_string([d_lex_idx, "t", "0"]),
                 )
             )
         sched_map_after_expected = ensure_dim_names_match_and_align(

From 4211f6e3c519072970830c796d46cef7a975ea17 Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Fri, 24 Jul 2020 13:14:32 -0500
Subject: [PATCH 125/460] add todo

---
 loopy/schedule/checker/schedule.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/loopy/schedule/checker/schedule.py b/loopy/schedule/checker/schedule.py
index 6b9905613..6ebf7848d 100644
--- a/loopy/schedule/checker/schedule.py
+++ b/loopy/schedule/checker/schedule.py
@@ -142,6 +142,7 @@ def generate_pairwise_schedules(
             # statements since last opened/closed loop), but if we have not
             # added any statements within the previous section yet, we
             # don't have to (effectively ignoring that section of code).
+            # TODO since we're getting rid of unnecessary dims later, maybe don't need this?
             stmt_added_since_prev_block_at_tier.pop()
             if stmt_added_since_prev_block_at_tier[-1]:
                 next_insn_lex_tuple[-1] = next_insn_lex_tuple[-1]+1

From a4e790f28563f62479a6f66b93d5c41a38045f17 Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Sun, 26 Jul 2020 18:33:30 -0500
Subject: [PATCH 126/460] while removing unnecessary dims in maps, also replace
 int-valued dims that are not in {0,1} with {0,1} (we're only describing
 relative order, so higher int values are unnecessary and confusing)

---
 loopy/schedule/checker/schedule.py | 47 +++++++++++++++++++++---------
 1 file changed, 33 insertions(+), 14 deletions(-)

diff --git a/loopy/schedule/checker/schedule.py b/loopy/schedule/checker/schedule.py
index 6ebf7848d..dff0162d6 100644
--- a/loopy/schedule/checker/schedule.py
+++ b/loopy/schedule/checker/schedule.py
@@ -88,14 +88,12 @@ def generate_pairwise_schedules(
 
     all_insn_ids = set().union(*insn_id_pairs)
 
-    # For each statement, create a :class:`ImmutableRecord` describing the set of
-    # statement instances. Contains the insn_id and a list representing points
+    # For each statement, map the insn_id to a tuple representing points
     # in the lexicographic ordering containing items of :class:`int` or
     # :class:`str` :mod:`loopy` inames.
     stmt_instances = {}
 
     from loopy.schedule import (EnterLoop, LeaveLoop, Barrier, RunInstruction)
-    from pytools import ImmutableRecord
 
     # go through linearization_items and generate pairwise sub-schedule
 
@@ -103,7 +101,6 @@ def generate_pairwise_schedules(
     # ordering, initially this as a 1-d point with value 0
     next_insn_lex_tuple = [0]
     stmt_added_since_prev_block_at_tier = [False]
-    max_lex_dim = 0
     for linearization_item in linearization_items:
         if isinstance(linearization_item, EnterLoop):
             iname = linearization_item.iname
@@ -196,18 +193,41 @@ def generate_pairwise_schedules(
     def _pad_tuple_with_zeros(tup, length):
         return tup[:] + tuple([0]*(length-len(tup)))
 
-    def _remove_matching_integer_dims(tup0, tup1):
+    def _simplify_dims(tup0, tup1):
         new_tup0 = []
         new_tup1 = []
+        # loop over dims
         for d0, d1 in zip(tup0, tup1):
-            if not (
-                isinstance(d0, int) and
-                isinstance(d1, int) and
-                d0 == d1):
+            if isinstance(d0, int) and isinstance(d1, int):
+                # Both vals are ints for this dim
+
+                # If the ints match, this dim doesn't provide info about the
+                # relative ordering of these two statements,
+                # so skip (remove) this dim.
+
+                # Otherwise, the ints inform us about the relative ordering of
+                # two statements. While their values may be larger than 1 in
+                # the lexicographic ordering describing a larger set of
+                # statements, in a pairwise schedule, only ints 0 and 1 are
+                # necessary to specify relative order. To keep the pairwise
+                # schedules as simple and comprehensible as possible, use only
+                # integers 0 and 1 to specify relative orderings in integer lex
+                # dims.
+                # (doesn't take much extra time since we are already going
+                # through these to remove unnecessary map dims)
+
+                if d0 == d1:
+                    continue
+                elif d0 > d1:
+                    new_tup0.append(1)
+                    new_tup1.append(0)
+                else:  # d1 > d0
+                    new_tup0.append(0)
+                    new_tup1.append(1)
+            else:
                 # keep this dim
                 new_tup0.append(d0)
                 new_tup1.append(d1)
-        # TODO? also change all ints to 0 or 1
         return tuple(new_tup0), tuple(new_tup1)
 
     def _get_map_for_stmt_inst(insn_id, lex_points, int_sid, out_names_sched):
@@ -252,7 +272,7 @@ def _get_map_for_stmt_inst(insn_id, lex_points, int_sid, out_names_sched):
         lex_tup_before = stmt_instances[insn_id_before]
         lex_tup_after = stmt_instances[insn_id_after]
 
-        # simplify tuples to the extent possible -------------------------------------
+        # simplify tuples to the extent possible ------------------------------------
 
         # At this point, pairwise sub-schedule may contain lex point tuples
         # missing dimensions; the values in these missing dims should
@@ -261,10 +281,9 @@ def _get_map_for_stmt_inst(insn_id, lex_points, int_sid, out_names_sched):
         lex_tup_before = _pad_tuple_with_zeros(lex_tup_before, max_lex_dims)
         lex_tup_after = _pad_tuple_with_zeros(lex_tup_after, max_lex_dims)
 
-        lex_tup_before, lex_tup_after = _remove_matching_integer_dims(
-            lex_tup_before, lex_tup_after)
+        lex_tup_before, lex_tup_after = _simplify_dims(lex_tup_before, lex_tup_after)
 
-        # Now generate maps from the blueprint ---------------------------------------
+        # Now generate maps from the blueprint --------------------------------------
 
         out_names_sched = [LEX_VAR_PREFIX+str(i) for i in range(len(lex_tup_before))]
 

From 80a89ea3a1e06ffe5c91158d6789f154cd93076b Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Sun, 26 Jul 2020 20:19:28 -0500
Subject: [PATCH 127/460] Remove logic avoiding gratuitous incrementing of
 integer lex dim values in schedule outline creation since these are replaced
 with {0, 1} in the map simplification step. Further reduce number of lex dims
 in pairwise maps with the following strategy: once a lex tuple dimension is
 found where both tuples have non-matching integer values, remove any
 faster-updating lex dimensions where both tuples have integer values, even if
 the integers don't match.

---
 loopy/schedule/checker/schedule.py | 86 +++++++++++++++---------------
 1 file changed, 42 insertions(+), 44 deletions(-)

diff --git a/loopy/schedule/checker/schedule.py b/loopy/schedule/checker/schedule.py
index dff0162d6..cd9443e67 100644
--- a/loopy/schedule/checker/schedule.py
+++ b/loopy/schedule/checker/schedule.py
@@ -100,33 +100,29 @@ def generate_pairwise_schedules(
     # keep track of the next tuple of points in our lexicographic
     # ordering, initially this as a 1-d point with value 0
     next_insn_lex_tuple = [0]
-    stmt_added_since_prev_block_at_tier = [False]
     for linearization_item in linearization_items:
         if isinstance(linearization_item, EnterLoop):
             iname = linearization_item.iname
             if iname in loops_to_ignore:
                 continue
 
-            # We could always increment next_insn_lex_tuple[-1] here since
-            # this new section of code comes after the previous section
-            # (statements since last opened/closed loop), but if we have
-            # not added any statements within the previous section yet, we
-            # don't have to (effectively ignoring that section of code).
-            if stmt_added_since_prev_block_at_tier[-1]:
-                next_insn_lex_tuple[-1] = next_insn_lex_tuple[-1]+1
-                stmt_added_since_prev_block_at_tier[-1] = False
+            # Increment next_insn_lex_tuple[-1] for statements in the section
+            # of code after this EnterLoop.
+            # (not technically necessary if no statement was added in the
+            # previous section; gratuitious incrementing is counteracted
+            # in the simplification step below)
+            next_insn_lex_tuple[-1] = next_insn_lex_tuple[-1]+1
 
             # upon entering a loop, we enter a new (deeper) tier,
             # add one lex dimension for the loop variable,
-            # add second lex dim to enumerate code blocks within new loop, and
-            # append a dim to stmt_added_since_prev_block_at_tier to represent
-            # new tier
+            # add second lex dim to enumerate code blocks within new loop
             next_insn_lex_tuple.append(iname)
             next_insn_lex_tuple.append(0)
-            stmt_added_since_prev_block_at_tier.append(False)
+
         elif isinstance(linearization_item, LeaveLoop):
             if linearization_item.iname in loops_to_ignore:
                 continue
+
             # upon leaving a loop,
             # pop lex dimension for enumerating code blocks within this loop, and
             # pop lex dimension for the loop variable, and
@@ -134,16 +130,13 @@ def generate_pairwise_schedules(
             next_insn_lex_tuple.pop()
             next_insn_lex_tuple.pop()
 
-            # We could always increment next_insn_lex_tuple[-1] here since
-            # this new block of code comes after the previous block (all
-            # statements since last opened/closed loop), but if we have not
-            # added any statements within the previous section yet, we
-            # don't have to (effectively ignoring that section of code).
-            # TODO since we're getting rid of unnecessary dims later, maybe don't need this?
-            stmt_added_since_prev_block_at_tier.pop()
-            if stmt_added_since_prev_block_at_tier[-1]:
-                next_insn_lex_tuple[-1] = next_insn_lex_tuple[-1]+1
-                stmt_added_since_prev_block_at_tier[-1] = False
+            # Increment next_insn_lex_tuple[-1] for statements in the section
+            # of code after this LeaveLoop.
+            # (not technically necessary if no statement was added in the
+            # previous section; gratuitious incrementing is counteracted
+            # in the simplification step below)
+            next_insn_lex_tuple[-1] = next_insn_lex_tuple[-1]+1
+
         elif isinstance(linearization_item, (RunInstruction, Barrier)):
             from loopy.schedule.checker.utils import (
                 get_insn_id_from_linearization_item,
@@ -170,9 +163,6 @@ def generate_pairwise_schedules(
                 # increment lex dim val enumerating items in current code block
                 next_insn_lex_tuple[-1] = next_insn_lex_tuple[-1] + 1
 
-                # all current (nested) blocks now contain a statement
-                stmt_added_since_prev_block_at_tier = [True]*len(
-                    stmt_added_since_prev_block_at_tier)
         else:
             from loopy.schedule import (CallKernel, ReturnFromKernel)
             # no action needed for these types of linearization item
@@ -193,37 +183,44 @@ def generate_pairwise_schedules(
     def _pad_tuple_with_zeros(tup, length):
         return tup[:] + tuple([0]*(length-len(tup)))
 
-    def _simplify_dims(tup0, tup1):
+    def _simplify_lex_dims(tup0, tup1):
+        """Simplify pair of lex tuples in order to reduce the complexity of
+        resulting maps. Remove lex tuple dimensions with matching integer values
+        since these do not provide information on relative ordering. For the same
+        reason, once a dimension is found where both tuples have non-matching integer
+        values, remove any faster-updating lex dimensions where both tuples have
+        integer values, even if the integers don't match.
+        """
+        # TODO actually, once we find non-matching integer dims, we don't
+        # need *any* more lex dims to specify relative ordering.
+
         new_tup0 = []
         new_tup1 = []
+        non_matching_int_dims_found = False
         # loop over dims
         for d0, d1 in zip(tup0, tup1):
             if isinstance(d0, int) and isinstance(d1, int):
                 # Both vals are ints for this dim
 
-                # If the ints match, this dim doesn't provide info about the
-                # relative ordering of these two statements,
-                # so skip (remove) this dim.
-
-                # Otherwise, the ints inform us about the relative ordering of
-                # two statements. While their values may be larger than 1 in
-                # the lexicographic ordering describing a larger set of
-                # statements, in a pairwise schedule, only ints 0 and 1 are
-                # necessary to specify relative order. To keep the pairwise
-                # schedules as simple and comprehensible as possible, use only
-                # integers 0 and 1 to specify relative orderings in integer lex
-                # dims.
-                # (doesn't take much extra time since we are already going
-                # through these to remove unnecessary map dims)
-
-                if d0 == d1:
+                if non_matching_int_dims_found or d0 == d1:
                     continue
                 elif d0 > d1:
+                    # These ints inform us about the relative ordering of
+                    # two statements. While their values may be larger than 1 in
+                    # the lexicographic ordering describing a larger set of
+                    # statements, in a pairwise schedule, only ints 0 and 1 are
+                    # necessary to specify relative order. To keep the pairwise
+                    # schedules as simple and comprehensible as possible, use only
+                    # integers 0 and 1 to specify this relative ordering.
+                    # (doesn't take much extra time since we are already going
+                    # through these to remove unnecessary lex tuple dims)
                     new_tup0.append(1)
                     new_tup1.append(0)
+                    non_matching_int_dims_found = True
                 else:  # d1 > d0
                     new_tup0.append(0)
                     new_tup1.append(1)
+                    non_matching_int_dims_found = True
             else:
                 # keep this dim
                 new_tup0.append(d0)
@@ -281,7 +278,8 @@ def _get_map_for_stmt_inst(insn_id, lex_points, int_sid, out_names_sched):
         lex_tup_before = _pad_tuple_with_zeros(lex_tup_before, max_lex_dims)
         lex_tup_after = _pad_tuple_with_zeros(lex_tup_after, max_lex_dims)
 
-        lex_tup_before, lex_tup_after = _simplify_dims(lex_tup_before, lex_tup_after)
+        lex_tup_before, lex_tup_after = _simplify_lex_dims(
+            lex_tup_before, lex_tup_after)
 
         # Now generate maps from the blueprint --------------------------------------
 

From 3dd9327d0ad604ba8e6ace69d9ddda0478695ed7 Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Sun, 26 Jul 2020 20:33:19 -0500
Subject: [PATCH 128/460] make generate_pairwise_schedules() return a
 dictionary mapping (insn_id_before, insn_id_after) tuples to
 (schedule_before, schedule_after) tuples

---
 loopy/schedule/checker/__init__.py |  2 ++
 loopy/schedule/checker/schedule.py |  4 +--
 test/test_linearization_checker.py | 51 ++++++++++++------------------
 3 files changed, 24 insertions(+), 33 deletions(-)

diff --git a/loopy/schedule/checker/__init__.py b/loopy/schedule/checker/__init__.py
index fa9700b47..000265486 100644
--- a/loopy/schedule/checker/__init__.py
+++ b/loopy/schedule/checker/__init__.py
@@ -105,6 +105,8 @@ def get_schedules_for_statement_pairs(
 
     """
 
+    # TODO update documentation
+
     # {{{ make sure kernel has been preprocessed
 
     from loopy.kernel import KernelState
diff --git a/loopy/schedule/checker/schedule.py b/loopy/schedule/checker/schedule.py
index cd9443e67..7b98c621a 100644
--- a/loopy/schedule/checker/schedule.py
+++ b/loopy/schedule/checker/schedule.py
@@ -264,7 +264,7 @@ def _get_map_for_stmt_inst(insn_id, lex_points, int_sid, out_names_sched):
             space=sched_space,
             )
 
-    pairwise_schedules = []
+    pairwise_schedules = {}
     for insn_id_before, insn_id_after in insn_id_pairs:
         lex_tup_before = stmt_instances[insn_id_before]
         lex_tup_after = stmt_instances[insn_id_after]
@@ -296,6 +296,6 @@ def _get_map_for_stmt_inst(insn_id, lex_points, int_sid, out_names_sched):
         map_after = _get_map_for_stmt_inst(
             insn_id_after, lex_tup_after, int_sid_after, out_names_sched)
 
-        pairwise_schedules.append((map_before, map_after))
+        pairwise_schedules[(insn_id_before, insn_id_after)] = (map_before, map_after)
 
     return pairwise_schedules
diff --git a/test/test_linearization_checker.py b/test/test_linearization_checker.py
index 716737bdd..a4657ba1f 100644
--- a/test/test_linearization_checker.py
+++ b/test/test_linearization_checker.py
@@ -112,14 +112,24 @@ def _lex_space_string(dim_vals):
             ["%s%d=%s" % (LEX_VAR_PREFIX, idx, str(val))
             for idx, val in enumerate(dim_vals)])
 
+    insn_id_pairs = [
+        ("insn_a", "insn_b"),
+        ("insn_a", "insn_c"),
+        ("insn_a", "insn_d"),
+        ("insn_b", "insn_c"),
+        ("insn_b", "insn_d"),
+        ("insn_c", "insn_d"),
+        ]
+    sched_maps = get_schedules_for_statement_pairs(
+        knl,
+        linearization_items,
+        insn_id_pairs,
+        )
+
     # Relationship between insn_a and insn_b ---------------------------------------
 
     # Get two maps
-    sched_map_before, sched_map_after = get_schedules_for_statement_pairs(
-        knl,
-        linearization_items,
-        [("insn_a", "insn_b")],
-        )[0]
+    sched_map_before, sched_map_after = sched_maps[("insn_a", "insn_b")]
 
     # Create expected maps, align, compare
 
@@ -150,11 +160,7 @@ def _lex_space_string(dim_vals):
     # Relationship between insn_a and insn_c ---------------------------------------
 
     # Get two maps
-    sched_map_before, sched_map_after = get_schedules_for_statement_pairs(
-        knl,
-        linearization_items,
-        [("insn_a", "insn_c")],
-        )[0]
+    sched_map_before, sched_map_after = sched_maps[("insn_a", "insn_c")]
 
     # Create expected maps, align, compare
 
@@ -188,11 +194,7 @@ def _lex_space_string(dim_vals):
     # (i loop could be before or after t loop)
     def perform_insn_ad_checks_with(a_lex_idx, d_lex_idx):
         # Get two maps
-        sched_map_before, sched_map_after = get_schedules_for_statement_pairs(
-            knl,
-            linearization_items,
-            [("insn_a", "insn_d")],
-            )[0]
+        sched_map_before, sched_map_after = sched_maps[("insn_a", "insn_d")]
 
         # Create expected maps, align, compare
 
@@ -230,14 +232,9 @@ def perform_insn_ad_checks_with(a_lex_idx, d_lex_idx):
     # Relationship between insn_b and insn_c ---------------------------------------
 
     # insn_b and insn_c could have been linearized in either order
-    # (i loop could be before or after t loop)
     def perform_insn_bc_checks_with(b_lex_idx, c_lex_idx):
         # Get two maps
-        sched_map_before, sched_map_after = get_schedules_for_statement_pairs(
-            knl,
-            linearization_items,
-            [("insn_b", "insn_c")],
-            )[0]
+        sched_map_before, sched_map_after = sched_maps[("insn_b", "insn_c")]
 
         # Create expected maps, align, compare
 
@@ -278,11 +275,7 @@ def perform_insn_bc_checks_with(b_lex_idx, c_lex_idx):
     # (i loop could be before or after t loop)
     def perform_insn_bd_checks_with(b_lex_idx, d_lex_idx):
         # Get two maps
-        sched_map_before, sched_map_after = get_schedules_for_statement_pairs(
-            knl,
-            linearization_items,
-            [("insn_b", "insn_d")],
-            )[0]
+        sched_map_before, sched_map_after = sched_maps[("insn_b", "insn_d")]
 
         # Create expected maps, align, compare
 
@@ -323,11 +316,7 @@ def perform_insn_bd_checks_with(b_lex_idx, d_lex_idx):
     # (i loop could be before or after t loop)
     def perform_insn_cd_checks_with(c_lex_idx, d_lex_idx):
         # Get two maps
-        sched_map_before, sched_map_after = get_schedules_for_statement_pairs(
-            knl,
-            linearization_items,
-            [("insn_c", "insn_d")],
-            )[0]
+        sched_map_before, sched_map_after = sched_maps[("insn_c", "insn_d")]
 
         # Create expected maps, align, compare
 

From 67887d36ed9eb1b1a229833b4590cac030f7d2b1 Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Sun, 26 Jul 2020 21:02:18 -0500
Subject: [PATCH 129/460] update sio test to deal with new output from
 get_schedules_for_statement_pairs(); don't hardcode expected lex order maps,
 instead create them to match expected dim size

---
 test/test_linearization_checker.py | 79 ++++++++++++++----------------
 1 file changed, 36 insertions(+), 43 deletions(-)

diff --git a/test/test_linearization_checker.py b/test/test_linearization_checker.py
index 7a1723d47..f081e2184 100644
--- a/test/test_linearization_checker.py
+++ b/test/test_linearization_checker.py
@@ -360,7 +360,7 @@ def perform_insn_cd_checks_with(c_lex_idx, d_lex_idx):
 def test_statement_instance_ordering_creation():
     import islpy as isl
     from loopy.schedule.checker import (
-        get_schedule_for_statement_pair,
+        get_schedules_for_statement_pairs,
     )
     from loopy.schedule.checker.schedule import (
         get_lex_order_map_for_sched_space,
@@ -371,6 +371,7 @@ def test_statement_instance_ordering_creation():
     )
     from loopy.schedule.checker.lexicographic_order_map import (
         get_statement_ordering_map,
+        create_lex_order_map,
     )
 
     # example kernel (add deps to fix loop order)
@@ -410,24 +411,44 @@ def test_statement_instance_ordering_creation():
     knl = get_one_linearized_kernel(knl)
     linearization_items = knl.linearization
 
+    # Get pairwise schedules
+    insn_id_pairs = [
+        ("insn_a", "insn_b"),
+        ("insn_a", "insn_c"),
+        ("insn_a", "insn_d"),
+        ("insn_b", "insn_c"),
+        ("insn_b", "insn_d"),
+        ("insn_c", "insn_d"),
+        ]
+    sched_maps = get_schedules_for_statement_pairs(
+        knl,
+        linearization_items,
+        insn_id_pairs,
+        )
+
     def check_sio_for_insn_pair(
             insn_id_before,
             insn_id_after,
-            expected_lex_order_map,
+            expected_lex_dims,
             expected_sio,
             ):
 
         # Get pairwise schedule
-        sched_map_before, sched_map_after = get_schedule_for_statement_pair(
-            knl,
-            linearization_items,
-            insn_id_before,
-            insn_id_after,
-            )
+        sched_map_before, sched_map_after = sched_maps[
+            (insn_id_before, insn_id_after)]
 
-        # get map representing lexicographic ordering
+        # Get map representing lexicographic ordering
         sched_lex_order_map = get_lex_order_map_for_sched_space(sched_map_before)
 
+        # Get expected lex order map
+        expected_lex_order_map = create_lex_order_map(
+            n_dims=expected_lex_dims,
+            before_names=["%s%d'" % (LEX_VAR_PREFIX, i)
+                for i in range(expected_lex_dims)],
+            after_names=["%s%d" % (LEX_VAR_PREFIX, i)
+                for i in range(expected_lex_dims)],
+            )
+
         assert sched_lex_order_map == expected_lex_order_map
 
         # create statement instance ordering,
@@ -442,28 +463,6 @@ def check_sio_for_insn_pair(
 
         assert sio_aligned == expected_sio
 
-    expected_lex_order_map = isl.Map(
-        "{{ "
-        "[{0}0', {0}1', {0}2', {0}3', {0}4'] -> [{0}0, {0}1, {0}2, {0}3, {0}4] :"
-        "("
-        "{0}0' < {0}0 "
-        ") or ("
-        "{0}0'={0}0 and {0}1' < {0}1 "
-        ") or ("
-        "{0}0'={0}0 and {0}1'={0}1 and {0}2' < {0}2 "
-        ") or ("
-        "{0}0'={0}0 and {0}1'={0}1 and {0}2'={0}2 and {0}3' < {0}3 "
-        ") or ("
-        "{0}0'={0}0 and {0}1'={0}1 and {0}2'={0}2 and {0}3'={0}3 and {0}4' < {0}4"
-        ")"
-        "}}".format(LEX_VAR_PREFIX))
-
-    # Isl ignores these apostrophes, but test would still pass since it ignores
-    # variable names when checking for equality. Even so, explicitly add apostrophes
-    # for sanity.
-    expected_lex_order_map = append_marker_to_isl_map_var_names(
-        expected_lex_order_map, isl.dim_type.in_, "'")
-
     # Relationship between insn_a and insn_b ---------------------------------------
 
     expected_sio = isl.Map(
@@ -478,8 +477,7 @@ def check_sio_for_insn_pair(
     expected_sio = append_marker_to_isl_map_var_names(
         expected_sio, isl.dim_type.in_, "'")
 
-    check_sio_for_insn_pair(
-        "insn_a", "insn_b", expected_lex_order_map, expected_sio)
+    check_sio_for_insn_pair("insn_a", "insn_b", 3, expected_sio)
 
     # Relationship between insn_a and insn_c ---------------------------------------
 
@@ -495,8 +493,7 @@ def check_sio_for_insn_pair(
     expected_sio = append_marker_to_isl_map_var_names(
         expected_sio, isl.dim_type.in_, "'")
 
-    check_sio_for_insn_pair(
-        "insn_a", "insn_c", expected_lex_order_map, expected_sio)
+    check_sio_for_insn_pair("insn_a", "insn_c", 3, expected_sio)
 
     # Relationship between insn_a and insn_d ---------------------------------------
 
@@ -510,8 +507,7 @@ def check_sio_for_insn_pair(
     expected_sio = append_marker_to_isl_map_var_names(
         expected_sio, isl.dim_type.in_, "'")
 
-    check_sio_for_insn_pair(
-        "insn_a", "insn_d", expected_lex_order_map, expected_sio)
+    check_sio_for_insn_pair("insn_a", "insn_d", 3, expected_sio)
 
     # Relationship between insn_b and insn_c ---------------------------------------
 
@@ -529,8 +525,7 @@ def check_sio_for_insn_pair(
     expected_sio = append_marker_to_isl_map_var_names(
         expected_sio, isl.dim_type.in_, "'")
 
-    check_sio_for_insn_pair(
-        "insn_b", "insn_c", expected_lex_order_map, expected_sio)
+    check_sio_for_insn_pair("insn_b", "insn_c", 3, expected_sio)
 
     # Relationship between insn_b and insn_d ---------------------------------------
 
@@ -544,8 +539,7 @@ def check_sio_for_insn_pair(
     expected_sio = append_marker_to_isl_map_var_names(
         expected_sio, isl.dim_type.in_, "'")
 
-    check_sio_for_insn_pair(
-        "insn_b", "insn_d", expected_lex_order_map, expected_sio)
+    check_sio_for_insn_pair("insn_b", "insn_d", 3, expected_sio)
 
     # Relationship between insn_c and insn_d ---------------------------------------
 
@@ -559,8 +553,7 @@ def check_sio_for_insn_pair(
     expected_sio = append_marker_to_isl_map_var_names(
         expected_sio, isl.dim_type.in_, "'")
 
-    check_sio_for_insn_pair(
-        "insn_c", "insn_d", expected_lex_order_map, expected_sio)
+    check_sio_for_insn_pair("insn_c", "insn_d", 3, expected_sio)
 
 # }}}
 

From 81dd0eee59b577edc58c41be83e425f110a2e1b3 Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Sun, 26 Jul 2020 21:14:55 -0500
Subject: [PATCH 130/460] add independent test for lex order map creation

---
 test/test_linearization_checker.py | 61 ++++++++++++++++++++++++++++++
 1 file changed, 61 insertions(+)

diff --git a/test/test_linearization_checker.py b/test/test_linearization_checker.py
index f081e2184..6070909c5 100644
--- a/test/test_linearization_checker.py
+++ b/test/test_linearization_checker.py
@@ -355,6 +355,67 @@ def perform_insn_cd_checks_with(c_lex_idx, d_lex_idx):
 # }}}
 
 
+# {{{ test lex order map creation
+
+def test_lex_order_map_creation():
+    import islpy as isl
+    from loopy.schedule.checker.lexicographic_order_map import (
+        create_lex_order_map,
+    )
+    from loopy.schedule.checker.utils import (
+        append_marker_to_isl_map_var_names,
+    )
+
+    def _check_lex_map(expected_lex_order_map, n_dims):
+        # Isl ignores the apostrophes, so explicitly add them
+        expected_lex_order_map = append_marker_to_isl_map_var_names(
+            expected_lex_order_map, isl.dim_type.in_, "'")
+
+        lex_order_map = create_lex_order_map(
+            n_dims=n_dims,
+            before_names=["%s%d'" % (LEX_VAR_PREFIX, i) for i in range(n_dims)],
+            after_names=["%s%d" % (LEX_VAR_PREFIX, i) for i in range(n_dims)],
+            )
+
+        assert lex_order_map == expected_lex_order_map
+        assert (
+            lex_order_map.get_var_names(isl.dim_type.in_) ==
+            expected_lex_order_map.get_var_names(isl.dim_type.in_))
+        assert (
+            lex_order_map.get_var_names(isl.dim_type.out) ==
+            expected_lex_order_map.get_var_names(isl.dim_type.out))
+
+    expected_lex_order_map = isl.Map(
+        "{{ "
+        "[{0}0', {0}1', {0}2', {0}3', {0}4'] -> [{0}0, {0}1, {0}2, {0}3, {0}4] :"
+        "("
+        "{0}0' < {0}0 "
+        ") or ("
+        "{0}0'={0}0 and {0}1' < {0}1 "
+        ") or ("
+        "{0}0'={0}0 and {0}1'={0}1 and {0}2' < {0}2 "
+        ") or ("
+        "{0}0'={0}0 and {0}1'={0}1 and {0}2'={0}2 and {0}3' < {0}3 "
+        ") or ("
+        "{0}0'={0}0 and {0}1'={0}1 and {0}2'={0}2 and {0}3'={0}3 and {0}4' < {0}4"
+        ")"
+        "}}".format(LEX_VAR_PREFIX))
+
+    _check_lex_map(expected_lex_order_map, 5)
+
+    expected_lex_order_map = isl.Map(
+        "{{ "
+        "[{0}0'] -> [{0}0] :"
+        "("
+        "{0}0' < {0}0 "
+        ")"
+        "}}".format(LEX_VAR_PREFIX))
+
+    _check_lex_map(expected_lex_order_map, 1)
+
+# }}}
+
+
 # {{{ test statement instance ordering creation
 
 def test_statement_instance_ordering_creation():

From a4ff29d4f828f0b00983b57bd0ec90206f4478f5 Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Mon, 27 Jul 2020 18:06:02 -0500
Subject: [PATCH 131/460] minor comment change

---
 loopy/schedule/checker/schedule.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/loopy/schedule/checker/schedule.py b/loopy/schedule/checker/schedule.py
index 7b98c621a..65905862e 100644
--- a/loopy/schedule/checker/schedule.py
+++ b/loopy/schedule/checker/schedule.py
@@ -113,8 +113,7 @@ def generate_pairwise_schedules(
             # in the simplification step below)
             next_insn_lex_tuple[-1] = next_insn_lex_tuple[-1]+1
 
-            # upon entering a loop, we enter a new (deeper) tier,
-            # add one lex dimension for the loop variable,
+            # Upon entering a loop, add one lex dimension for the loop variable,
             # add second lex dim to enumerate code blocks within new loop
             next_insn_lex_tuple.append(iname)
             next_insn_lex_tuple.append(0)

From 86c3ff938be14495e4567d62dd9d86761e650009 Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Sun, 2 Aug 2020 20:58:50 -0500
Subject: [PATCH 132/460] When simplifying pairs of lex tuples before map
 creation, once a dimension is found where both tuples have non-matching
 integer values, remove all faster-updating lex dimensions.

---
 loopy/schedule/checker/schedule.py | 30 +++++++++++++++++-------------
 test/test_linearization_checker.py | 20 ++++++++++----------
 2 files changed, 27 insertions(+), 23 deletions(-)

diff --git a/loopy/schedule/checker/schedule.py b/loopy/schedule/checker/schedule.py
index 65905862e..6044f7c7d 100644
--- a/loopy/schedule/checker/schedule.py
+++ b/loopy/schedule/checker/schedule.py
@@ -183,25 +183,24 @@ def _pad_tuple_with_zeros(tup, length):
         return tup[:] + tuple([0]*(length-len(tup)))
 
     def _simplify_lex_dims(tup0, tup1):
-        """Simplify pair of lex tuples in order to reduce the complexity of
+        """Simplify a pair of lex tuples in order to reduce the complexity of
         resulting maps. Remove lex tuple dimensions with matching integer values
-        since these do not provide information on relative ordering. For the same
-        reason, once a dimension is found where both tuples have non-matching integer
-        values, remove any faster-updating lex dimensions where both tuples have
-        integer values, even if the integers don't match.
+        since these do not provide information on relative ordering. Once a
+        dimension is found where both tuples have non-matching integer values,
+        remove any faster-updating lex dimensions since they are not necessary
+        to speficy relative ordering.
         """
-        # TODO actually, once we find non-matching integer dims, we don't
-        # need *any* more lex dims to specify relative ordering.
 
         new_tup0 = []
         new_tup1 = []
-        non_matching_int_dims_found = False
+
         # loop over dims
         for d0, d1 in zip(tup0, tup1):
             if isinstance(d0, int) and isinstance(d1, int):
-                # Both vals are ints for this dim
 
-                if non_matching_int_dims_found or d0 == d1:
+                # Both vals are ints for this dim
+                if d0 == d1:
+                    # Do not keep this dim
                     continue
                 elif d0 > d1:
                     # These ints inform us about the relative ordering of
@@ -215,15 +214,20 @@ def _simplify_lex_dims(tup0, tup1):
                     # through these to remove unnecessary lex tuple dims)
                     new_tup0.append(1)
                     new_tup1.append(0)
-                    non_matching_int_dims_found = True
+
+                    # No further dims needed to fully specify ordering
+                    break
                 else:  # d1 > d0
                     new_tup0.append(0)
                     new_tup1.append(1)
-                    non_matching_int_dims_found = True
+
+                    # No further dims needed to fully specify ordering
+                    break
             else:
-                # keep this dim
+                # Keep this dim without modifying
                 new_tup0.append(d0)
                 new_tup1.append(d1)
+
         return tuple(new_tup0), tuple(new_tup1)
 
     def _get_map_for_stmt_inst(insn_id, lex_points, int_sid, out_names_sched):
diff --git a/test/test_linearization_checker.py b/test/test_linearization_checker.py
index a4657ba1f..6d0fd3abf 100644
--- a/test/test_linearization_checker.py
+++ b/test/test_linearization_checker.py
@@ -137,7 +137,7 @@ def _lex_space_string(dim_vals):
         "[pi, pk] -> { [%s=0, i, k] -> [%s] : 0 <= i < pi and 0 <= k < pk }"
         % (
             STATEMENT_VAR_NAME,
-            _lex_space_string(["i", "0", "k"]),
+            _lex_space_string(["i", "0"]),
             )
         )
     sched_map_before_expected = ensure_dim_names_match_and_align(
@@ -147,7 +147,7 @@ def _lex_space_string(dim_vals):
         "[pi, pj] -> { [%s=1, i, j] -> [%s] : 0 <= i < pi and 0 <= j < pj }"
         % (
             STATEMENT_VAR_NAME,
-            _lex_space_string(["i", "1", "j"]),
+            _lex_space_string(["i", "1"]),
             )
         )
     sched_map_after_expected = ensure_dim_names_match_and_align(
@@ -168,7 +168,7 @@ def _lex_space_string(dim_vals):
         "[pi, pk] -> { [%s=0, i, k] -> [%s] : 0 <= i < pi and 0 <= k < pk }"
         % (
             STATEMENT_VAR_NAME,
-            _lex_space_string(["i", "0", "k"]),
+            _lex_space_string(["i", "0"]),
             )
         )
     sched_map_before_expected = ensure_dim_names_match_and_align(
@@ -178,7 +178,7 @@ def _lex_space_string(dim_vals):
         "[pi, pj] -> { [%s=1, i, j] -> [%s] : 0 <= i < pi and 0 <= j < pj }"
         % (
             STATEMENT_VAR_NAME,
-            _lex_space_string(["i", "1", "j"]),
+            _lex_space_string(["i", "1"]),
             )
         )
     sched_map_after_expected = ensure_dim_names_match_and_align(
@@ -202,7 +202,7 @@ def perform_insn_ad_checks_with(a_lex_idx, d_lex_idx):
             "[pi, pk] -> { [%s=0, i, k] -> [%s] : 0 <= i < pi and 0 <= k < pk }"
             % (
                 STATEMENT_VAR_NAME,
-                _lex_space_string([a_lex_idx, "i", "k"]),
+                _lex_space_string([a_lex_idx, ]),
                 )
             )
         sched_map_before_expected = ensure_dim_names_match_and_align(
@@ -212,7 +212,7 @@ def perform_insn_ad_checks_with(a_lex_idx, d_lex_idx):
             "[pt] -> { [%s=1, t] -> [%s] : 0 <= t < pt }"
             % (
                 STATEMENT_VAR_NAME,
-                _lex_space_string([d_lex_idx, "t", "0"]),
+                _lex_space_string([d_lex_idx, ]),
                 )
             )
         sched_map_after_expected = ensure_dim_names_match_and_align(
@@ -283,7 +283,7 @@ def perform_insn_bd_checks_with(b_lex_idx, d_lex_idx):
             "[pi, pj] -> { [%s=0, i, j] -> [%s] : 0 <= i < pi and 0 <= j < pj }"
             % (
                 STATEMENT_VAR_NAME,
-                _lex_space_string([b_lex_idx, "i", "j"]),
+                _lex_space_string([b_lex_idx, ]),
                 )
             )
         sched_map_before_expected = ensure_dim_names_match_and_align(
@@ -293,7 +293,7 @@ def perform_insn_bd_checks_with(b_lex_idx, d_lex_idx):
             "[pt] -> { [%s=1, t] -> [%s] : 0 <= t < pt }"
             % (
                 STATEMENT_VAR_NAME,
-                _lex_space_string([d_lex_idx, "t", "0"]),
+                _lex_space_string([d_lex_idx, ]),
                 )
             )
         sched_map_after_expected = ensure_dim_names_match_and_align(
@@ -324,7 +324,7 @@ def perform_insn_cd_checks_with(c_lex_idx, d_lex_idx):
             "[pi, pj] -> { [%s=0, i, j] -> [%s] : 0 <= i < pi and 0 <= j < pj }"
             % (
                 STATEMENT_VAR_NAME,
-                _lex_space_string([c_lex_idx, "i", "j"]),
+                _lex_space_string([c_lex_idx, ]),
                 )
             )
         sched_map_before_expected = ensure_dim_names_match_and_align(
@@ -334,7 +334,7 @@ def perform_insn_cd_checks_with(c_lex_idx, d_lex_idx):
             "[pt] -> { [%s=1, t] -> [%s] : 0 <= t < pt }"
             % (
                 STATEMENT_VAR_NAME,
-                _lex_space_string([d_lex_idx, "t", "0"]),
+                _lex_space_string([d_lex_idx, ]),
                 )
             )
         sched_map_after_expected = ensure_dim_names_match_and_align(

From 39ec3c4ebfca2b02e36a8c94d2953d5a62aa2ea7 Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Sun, 2 Aug 2020 21:26:25 -0500
Subject: [PATCH 133/460] instead of operating on Xbefore and Xafter pairs in
 pairs of statements, keep all pairs in lists (of length 2) and operate on the
 lists with loops

---
 loopy/schedule/checker/schedule.py | 32 +++++++++++++++---------------
 1 file changed, 16 insertions(+), 16 deletions(-)

diff --git a/loopy/schedule/checker/schedule.py b/loopy/schedule/checker/schedule.py
index 6044f7c7d..769a690f2 100644
--- a/loopy/schedule/checker/schedule.py
+++ b/loopy/schedule/checker/schedule.py
@@ -268,37 +268,37 @@ def _get_map_for_stmt_inst(insn_id, lex_points, int_sid, out_names_sched):
             )
 
     pairwise_schedules = {}
-    for insn_id_before, insn_id_after in insn_id_pairs:
-        lex_tup_before = stmt_instances[insn_id_before]
-        lex_tup_after = stmt_instances[insn_id_after]
+    for insn_ids in insn_id_pairs:
+        lex_tuples = [stmt_instances[insn_id] for insn_id in insn_ids]
 
         # simplify tuples to the extent possible ------------------------------------
 
         # At this point, pairwise sub-schedule may contain lex point tuples
         # missing dimensions; the values in these missing dims should
         # be zero, so add them.
-        max_lex_dims = max(len(lex_tup_before), len(lex_tup_after))
-        lex_tup_before = _pad_tuple_with_zeros(lex_tup_before, max_lex_dims)
-        lex_tup_after = _pad_tuple_with_zeros(lex_tup_after, max_lex_dims)
+        max_lex_dims = max([len(lex_tuple) for lex_tuple in lex_tuples])
+        lex_tuples_padded = [
+            _pad_tuple_with_zeros(lex_tuple, max_lex_dims)
+            for lex_tuple in lex_tuples]
 
-        lex_tup_before, lex_tup_after = _simplify_lex_dims(
-            lex_tup_before, lex_tup_after)
+        lex_tuples_simplified = _simplify_lex_dims(*lex_tuples_padded)
 
         # Now generate maps from the blueprint --------------------------------------
 
-        out_names_sched = [LEX_VAR_PREFIX+str(i) for i in range(len(lex_tup_before))]
+        out_names_sched = [
+            LEX_VAR_PREFIX+str(i) for i in range(len(lex_tuples_simplified[0]))]
 
         # Determine integer IDs that will represent each statement in mapping
         # (dependency map creation assumes sid_before=0 and sid_after=1, unless
         # before and after refer to same stmt, in which case sid_before=sid_after=0)
-        int_sid_before = 0
-        int_sid_after = 0 if insn_id_before == insn_id_after else 1
+        int_sids = [0, 0] if insn_ids[0] == insn_ids[1] else [0, 1]
 
-        map_before = _get_map_for_stmt_inst(
-            insn_id_before, lex_tup_before, int_sid_before, out_names_sched)
-        map_after = _get_map_for_stmt_inst(
-            insn_id_after, lex_tup_after, int_sid_after, out_names_sched)
+        sched_maps = [
+            _get_map_for_stmt_inst(insn_id, lex_tuple, int_sid, out_names_sched)
+            for insn_id, lex_tuple, int_sid
+            in zip(insn_ids, lex_tuples_simplified, int_sids)
+            ]
 
-        pairwise_schedules[(insn_id_before, insn_id_after)] = (map_before, map_after)
+        pairwise_schedules[tuple(insn_ids)] = tuple(sched_maps)
 
     return pairwise_schedules

From 5f060a84d96cf960c50a528b0b37b18ec355c170 Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Sun, 2 Aug 2020 21:32:55 -0500
Subject: [PATCH 134/460] reduce the number of dims expected in lex maps after
 update that simplified lex maps

---
 test/test_linearization_checker.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/test/test_linearization_checker.py b/test/test_linearization_checker.py
index a3a95b624..bf33bebb2 100644
--- a/test/test_linearization_checker.py
+++ b/test/test_linearization_checker.py
@@ -538,7 +538,7 @@ def check_sio_for_insn_pair(
     expected_sio = append_marker_to_isl_map_var_names(
         expected_sio, isl.dim_type.in_, "'")
 
-    check_sio_for_insn_pair("insn_a", "insn_b", 3, expected_sio)
+    check_sio_for_insn_pair("insn_a", "insn_b", 2, expected_sio)
 
     # Relationship between insn_a and insn_c ---------------------------------------
 
@@ -554,7 +554,7 @@ def check_sio_for_insn_pair(
     expected_sio = append_marker_to_isl_map_var_names(
         expected_sio, isl.dim_type.in_, "'")
 
-    check_sio_for_insn_pair("insn_a", "insn_c", 3, expected_sio)
+    check_sio_for_insn_pair("insn_a", "insn_c", 2, expected_sio)
 
     # Relationship between insn_a and insn_d ---------------------------------------
 
@@ -568,7 +568,7 @@ def check_sio_for_insn_pair(
     expected_sio = append_marker_to_isl_map_var_names(
         expected_sio, isl.dim_type.in_, "'")
 
-    check_sio_for_insn_pair("insn_a", "insn_d", 3, expected_sio)
+    check_sio_for_insn_pair("insn_a", "insn_d", 1, expected_sio)
 
     # Relationship between insn_b and insn_c ---------------------------------------
 
@@ -600,7 +600,7 @@ def check_sio_for_insn_pair(
     expected_sio = append_marker_to_isl_map_var_names(
         expected_sio, isl.dim_type.in_, "'")
 
-    check_sio_for_insn_pair("insn_b", "insn_d", 3, expected_sio)
+    check_sio_for_insn_pair("insn_b", "insn_d", 1, expected_sio)
 
     # Relationship between insn_c and insn_d ---------------------------------------
 
@@ -614,7 +614,7 @@ def check_sio_for_insn_pair(
     expected_sio = append_marker_to_isl_map_var_names(
         expected_sio, isl.dim_type.in_, "'")
 
-    check_sio_for_insn_pair("insn_c", "insn_d", 3, expected_sio)
+    check_sio_for_insn_pair("insn_c", "insn_d", 1, expected_sio)
 
 # }}}
 

From 6bb69899730e940ce5e462500cc40827ec23261f Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Sun, 2 Aug 2020 21:34:24 -0500
Subject: [PATCH 135/460] fix flake8 issue

---
 loopy/schedule/checker/utils.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/loopy/schedule/checker/utils.py b/loopy/schedule/checker/utils.py
index bad083558..3bde5247b 100644
--- a/loopy/schedule/checker/utils.py
+++ b/loopy/schedule/checker/utils.py
@@ -140,7 +140,7 @@ def ensure_dim_names_match_and_align(obj_map, tgt_map):
 def sorted_union_of_names_in_isl_sets(
         isl_sets,
         set_dim=isl.dim_type.set):
-    """Return a sorted list of the union of all variable names found in
+    r"""Return a sorted list of the union of all variable names found in
     the provided :class:`islpy.Set`\ s.
     """
 

From 41654850aac4b18394883c6f3607076b74b10021 Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Sun, 2 Aug 2020 22:25:56 -0500
Subject: [PATCH 136/460] remove get_islvars_from_space(); instead use
 isl.affs_from_space()

---
 loopy/schedule/checker/utils.py | 21 ++++++++-------------
 1 file changed, 8 insertions(+), 13 deletions(-)

diff --git a/loopy/schedule/checker/utils.py b/loopy/schedule/checker/utils.py
index 3bde5247b..019117231 100644
--- a/loopy/schedule/checker/utils.py
+++ b/loopy/schedule/checker/utils.py
@@ -28,18 +28,6 @@ def prettier_map_string(map_obj):
                ).replace("{ ", "{\n").replace(" }", "\n}").replace("; ", ";\n")
 
 
-def get_islvars_from_space(space):
-    #pu.db
-    param_names = space.get_var_names(isl.dim_type.param)
-    in_names = space.get_var_names(isl.dim_type.in_)
-    out_names = space.get_var_names(isl.dim_type.out)
-    return isl.make_zero_and_vars(in_names+out_names, param_names)
-    #old = isl.make_zero_and_vars(in_names+out_names, param_names)
-    #new = isl.affs_from_space(space)
-    #assert old == new
-    #return new
-
-
 def add_dims_to_isl_set(isl_set, dim_type, names, new_idx_start):
     new_set = isl_set.insert_dims(
         dim_type, new_idx_start, len(names)
@@ -183,7 +171,14 @@ def create_symbolic_map_from_tuples(
     space_out_names = space.get_var_names(dim_type.out)
     space_in_names = space.get_var_names(isl.dim_type.in_)
 
-    islvars = get_islvars_from_space(space)
+    # get islvars from space
+    islvars = isl.affs_from_space(
+        space.move_dims(
+            isl.dim_type.out, 0,
+            isl.dim_type.in_, 0,
+            len(space_in_names),
+            ).range()
+        )
 
     # loop through pairs and create a set that will later be converted to a map
 

From 3f4b3b8fbf0d057e317d48eb4164eaedc86f3c94 Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Tue, 4 Aug 2020 01:41:09 -0500
Subject: [PATCH 137/460] make example usage of
 get_schedules_for_statement_pairs() into a doctest; probably the wrong way to
 do this... (but it's not time to add this to tutorial yet)

---
 loopy/schedule/checker/__init__.py | 73 +++++++++++-------------------
 1 file changed, 27 insertions(+), 46 deletions(-)

diff --git a/loopy/schedule/checker/__init__.py b/loopy/schedule/checker/__init__.py
index 000265486..d28c2f676 100644
--- a/loopy/schedule/checker/__init__.py
+++ b/loopy/schedule/checker/__init__.py
@@ -56,52 +56,33 @@ def get_schedules_for_statement_pairs(
         from statement instances to lexicographic time, one for
         each of the two statements.
 
-    Example usage::
-
-        # Make kernel ------------------------------------------------------------
-        knl = lp.make_kernel(
-            "{[i,j,k]: 0<=i<pi and 0<=j<pj and 0<=k<pk}",
-            [
-                "a[i,j] = j  {id=insn_a}",
-                "b[i,k] = k+a[i,0]  {id=insn_b,dep=insn_a}",
-            ])
-        knl = lp.add_and_infer_dtypes(knl, {"a": np.float32, "b": np.float32})
-        knl = lp.prioritize_loops(knl, "i,j")
-        knl = lp.prioritize_loops(knl, "i,k")
-
-        # Get a linearization
-        knl = lp.get_one_linearized_kernel(lp.preprocess_kernel(knl))
-
-        # Get a pairwise schedule ------------------------------------------------
-
-        from loopy.schedule.checker import (
-            get_schedule_for_statement_pair,
-        )
-
-        # Get two maps -----------------------------------------------------------
-
-        sched_a, sched_b = get_schedule_for_statement_pair(
-            knl,
-            knl.linearization,
-            "insn_a",
-            "insn_b",
-            )
-
-        print(sched_a)
-        print(sched_b)
-
-    Example Output::
-
-        [pi, pj, pk] -> {
-        [_lp_linchk_statement = 0, i, j, k] ->
-        [_lp_linchk_l0 = 0, _lp_linchk_l1 = i, _lp_linchk_l2 = 0,
-        _lp_linchk_l3 = j, _lp_linchk_l4 = 0] :
-        0 <= i < pi and 0 <= j < pj and 0 <= k < pk }
-        [pi, pj, pk] -> {
-        [_lp_linchk_statement = 1, i, j, k] ->
-        [_lp_linchk_l0 = 0, _lp_linchk_l1 = i, _lp_linchk_l2 = 1,
-        _lp_linchk_l3 = k, _lp_linchk_l4 = 0] :
-        0 <= i < pi and 0 <= j < pj and 0 <= k < pk }
+    .. doctest:
+
+        >>> import loopy as lp
+        >>> import numpy as np
+        >>> # Make kernel -----------------------------------------------------------
+        >>> knl = lp.make_kernel(
+        ...     "{[i,j,k]: 0<=i<pi and 0<=j<pj and 0<=k<pk}",
+        ...     [
+        ...         "a[i,j] = j  {id=insn_a}",
+        ...         "b[i,k] = k+a[i,0]  {id=insn_b,dep=insn_a}",
+        ...     ])
+        >>> knl = lp.add_and_infer_dtypes(knl, {"a": np.float32, "b": np.float32})
+        >>> knl = lp.prioritize_loops(knl, "i,j")
+        >>> knl = lp.prioritize_loops(knl, "i,k")
+        >>> # Get a linearization
+        >>> knl = lp.get_one_linearized_kernel(lp.preprocess_kernel(knl))
+        >>> # Get a pairwise schedule -----------------------------------------------
+        >>> from loopy.schedule.checker import get_schedules_for_statement_pairs
+        >>> # Get two maps ----------------------------------------------------------
+        >>> schedules = get_schedules_for_statement_pairs(
+        ...     knl,
+        ...     knl.linearization,
+        ...     [("insn_a", "insn_b")],
+        ...     )
+        >>> print(*schedules[("insn_a", "insn_b")], sep="\n")
+        [pi, pj, pk] -> { [_lp_linchk_statement = 0, i, j, k] -> [_lp_linchk_l0 = i, _lp_linchk_l1 = 0] : 0 <= i < pi and 0 <= j < pj and 0 <= k < pk }
+        [pi, pj, pk] -> { [_lp_linchk_statement = 1, i, j, k] -> [_lp_linchk_l0 = i, _lp_linchk_l1 = 1] : 0 <= i < pi and 0 <= j < pj and 0 <= k < pk }
 
     """
 

From de5ab4480058c797d09830102e70a9a402053f9b Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Tue, 4 Aug 2020 02:37:14 -0500
Subject: [PATCH 138/460] update all docstrings/comments after recent changes

---
 loopy/schedule/checker/__init__.py |  31 ++++----
 loopy/schedule/checker/schedule.py | 115 +++++++++++++++--------------
 loopy/schedule/checker/utils.py    |  41 +++++-----
 3 files changed, 94 insertions(+), 93 deletions(-)

diff --git a/loopy/schedule/checker/__init__.py b/loopy/schedule/checker/__init__.py
index d28c2f676..269e7ba05 100644
--- a/loopy/schedule/checker/__init__.py
+++ b/loopy/schedule/checker/__init__.py
@@ -28,32 +28,31 @@ def get_schedules_for_statement_pairs(
         linearization_items,
         insn_id_pairs,
         ):
-    r"""Given a pair of statements in a linearized kernel, determine
-    the (relative) order in which the instances are executed,
-    by creating a mapping from statement instances to points in a single
-    lexicographic ordering. Create a pair of :class:`islpy.Map`\ s
-    representing a pairwise schedule as two mappings from statement instances
-    to lexicographic time.
+    r"""For each statement pair in a subset of all statement pairs found in a
+    linearized kernel, determine the (relative) order in which the statement
+    instances are executed. For each pair, describe this relative ordering with
+    a pair of mappings from statement instances to points in a single
+    lexicographic ordering (a ``pairwise schedule''). When determining the
+    relative ordering, ignore concurrent inames.
 
     :arg knl: A preprocessed :class:`loopy.kernel.LoopKernel` containing the
         linearization items that will be used to create a schedule.
 
     :arg linearization_items: A list of :class:`loopy.schedule.ScheduleItem`
         (to be renamed to `loopy.schedule.LinearizationItem`) containing
-        the two linearization items for which a schedule will be
+        all linearization items for which pairwise schedules will be
         created. This list may be a *partial* linearization for a
         kernel since this function may be used during the linearization
         process.
 
-    :arg insn_id_before: An instruction identifier that is unique within
-        a :class:`loopy.kernel.LoopKernel`.
+    :arg insn_id_pairs: A list of two-tuples containing pairs of instruction
+        identifiers, each of which is unique within a
+        :class:`loopy.kernel.LoopKernel`.
 
-    :arg insn_id_after: An instruction identifier that is unique within
-        a :class:`loopy.kernel.LoopKernel`.
-
-    :returns: A two-tuple containing two :class:`islpy.Map`s
-        representing the a pairwise schedule as two mappings
-        from statement instances to lexicographic time, one for
+    :returns: A dictionary mapping each two-tuple of instruction identifiers
+        provided in `insn_id_pairs` to a corresponding two-tuple containing two
+        :class:`islpy.Map`\ s representing a pairwise schedule as two
+        mappings from statement instances to lexicographic time, one for
         each of the two statements.
 
     .. doctest:
@@ -86,8 +85,6 @@ def get_schedules_for_statement_pairs(
 
     """
 
-    # TODO update documentation
-
     # {{{ make sure kernel has been preprocessed
 
     from loopy.kernel import KernelState
diff --git a/loopy/schedule/checker/schedule.py b/loopy/schedule/checker/schedule.py
index 769a690f2..724053e59 100644
--- a/loopy/schedule/checker/schedule.py
+++ b/loopy/schedule/checker/schedule.py
@@ -53,53 +53,56 @@ def generate_pairwise_schedules(
         insn_id_pairs,
         loops_to_ignore=set(),
         ):
-    r"""Given a pair of statements in a linearized kernel, determine
-    the (relative) order in which the instances are executed,
-    by creating a mapping from statement instances to points in a single
-    lexicographic ordering. Create a pair of :class:`islpy.Map`\ s
-    representing a pairwise schedule as two mappings from statement instances
-    to lexicographic time.
-
-    :arg knl: A :class:`loopy.kernel.LoopKernel` containing the
-        linearization items that will be described by the schedule. This
+    r"""For each statement pair in a subset of all statement pairs found in a
+    linearized kernel, determine the (relative) order in which the statement
+    instances are executed. For each pair, describe this relative ordering with
+    a pair of mappings from statement instances to points in a single
+    lexicographic ordering (a ``pairwise schedule'').
+
+    :arg knl: A preprocessed :class:`loopy.kernel.LoopKernel` containing the
+        linearization items that will be used to create a schedule. This
         kernel will be used to get the domains associated with the inames
         used in the statements.
 
     :arg linearization_items: A list of :class:`loopy.schedule.ScheduleItem`
-        (to be renamed to `loopy.schedule.LinearizationItem`) including the
-        two linearization items whose relative order will be described by the
-        schedule. This list may be a *partial* linearization for a kernel since
-        this function may be used during the linearization process.
-
-    :arg before_insn_id: A :class:`str` instruction id specifying
-        stmt_instance_set_before in this pair of instructions.
-
-    :arg after_insn_id: A :class:`str` instruction id specifying
-        stmt_instance_set_after in this pair of instructions.
-
-    :returns: A two-tuple containing two :class:`islpy.Map`\ s
-        representing a pairwise schedule as two mappings
-        from statement instances to lexicographic time, one for
+        (to be renamed to `loopy.schedule.LinearizationItem`) containing
+        all linearization items for which pairwise schedules will be
+        created. This list may be a *partial* linearization for a
+        kernel since this function may be used during the linearization
+        process.
+
+    :arg insn_id_pairs: A list of two-tuples containing pairs of instruction
+        identifiers, each of which is unique within a
+        :class:`loopy.kernel.LoopKernel`.
+
+    :arg loops_to_ignore: A set of inames that will be ignored when
+        determining the relative ordering of statements. This will typically
+        contain concurrent inames.
+
+    :returns: A dictionary mapping each two-tuple of instruction identifiers
+        provided in `insn_id_pairs` to a corresponding two-tuple containing two
+        :class:`islpy.Map`\ s representing a pairwise schedule as two
+        mappings from statement instances to lexicographic time, one for
         each of the two statements.
     """
 
-    # TODO
-    # update documentation
+    from loopy.schedule import (EnterLoop, LeaveLoop, Barrier, RunInstruction)
 
     all_insn_ids = set().union(*insn_id_pairs)
 
+    # First, use one pass through linearization_items to generate a lexicographic
+    # ordering describing the relative order of *all* statements represented by
+    # all_insn_ids
+
     # For each statement, map the insn_id to a tuple representing points
     # in the lexicographic ordering containing items of :class:`int` or
     # :class:`str` :mod:`loopy` inames.
     stmt_instances = {}
 
-    from loopy.schedule import (EnterLoop, LeaveLoop, Barrier, RunInstruction)
-
-    # go through linearization_items and generate pairwise sub-schedule
-
-    # keep track of the next tuple of points in our lexicographic
+    # Keep track of the next tuple of points in our lexicographic
     # ordering, initially this as a 1-d point with value 0
     next_insn_lex_tuple = [0]
+
     for linearization_item in linearization_items:
         if isinstance(linearization_item, EnterLoop):
             iname = linearization_item.iname
@@ -109,12 +112,12 @@ def generate_pairwise_schedules(
             # Increment next_insn_lex_tuple[-1] for statements in the section
             # of code after this EnterLoop.
             # (not technically necessary if no statement was added in the
-            # previous section; gratuitious incrementing is counteracted
+            # previous section; gratuitous incrementing is counteracted
             # in the simplification step below)
             next_insn_lex_tuple[-1] = next_insn_lex_tuple[-1]+1
 
             # Upon entering a loop, add one lex dimension for the loop variable,
-            # add second lex dim to enumerate code blocks within new loop
+            # add second lex dim to enumerate sections of code within new loop
             next_insn_lex_tuple.append(iname)
             next_insn_lex_tuple.append(0)
 
@@ -122,17 +125,17 @@ def generate_pairwise_schedules(
             if linearization_item.iname in loops_to_ignore:
                 continue
 
-            # upon leaving a loop,
-            # pop lex dimension for enumerating code blocks within this loop, and
+            # Upon leaving a loop,
+            # pop lex dimension for enumerating code sections within this loop, and
             # pop lex dimension for the loop variable, and
-            # increment lex dim val enumerating items in current code block
+            # increment lex dim val enumerating items in current section of code
             next_insn_lex_tuple.pop()
             next_insn_lex_tuple.pop()
 
             # Increment next_insn_lex_tuple[-1] for statements in the section
             # of code after this LeaveLoop.
             # (not technically necessary if no statement was added in the
-            # previous section; gratuitious incrementing is counteracted
+            # previous section; gratuitous incrementing is counteracted
             # in the simplification step below)
             next_insn_lex_tuple[-1] = next_insn_lex_tuple[-1]+1
 
@@ -154,23 +157,23 @@ def generate_pairwise_schedules(
 
                 continue
 
-            # only process listed insns, otherwise ignore
+            # Only process listed insns, otherwise ignore
             if lp_insn_id in all_insn_ids:
-                # add item
+                # Add item to stmt_instances
                 stmt_instances[lp_insn_id] = tuple(next_insn_lex_tuple[:])
 
-                # increment lex dim val enumerating items in current code block
+                # Increment lex dim val enumerating items in current section of code
                 next_insn_lex_tuple[-1] = next_insn_lex_tuple[-1] + 1
 
         else:
             from loopy.schedule import (CallKernel, ReturnFromKernel)
-            # no action needed for these types of linearization item
+            # No action needed for these types of linearization item
             assert isinstance(
                 linearization_item, (CallKernel, ReturnFromKernel))
             pass
 
-        # to save time, stop when we've created all statements
-        if len(stmt_instances.keys()) == all_insn_ids:
+        # To save time, stop when we've found all statements
+        if len(stmt_instances.keys()) == len(all_insn_ids):
             break
 
     from loopy.schedule.checker.utils import (
@@ -179,8 +182,8 @@ def generate_pairwise_schedules(
         add_dims_to_isl_set,
     )
 
-    def _pad_tuple_with_zeros(tup, length):
-        return tup[:] + tuple([0]*(length-len(tup)))
+    def _pad_tuple_with_zeros(tup, desired_length):
+        return tup[:] + tuple([0]*(desired_length-len(tup)))
 
     def _simplify_lex_dims(tup0, tup1):
         """Simplify a pair of lex tuples in order to reduce the complexity of
@@ -188,13 +191,13 @@ def _simplify_lex_dims(tup0, tup1):
         since these do not provide information on relative ordering. Once a
         dimension is found where both tuples have non-matching integer values,
         remove any faster-updating lex dimensions since they are not necessary
-        to speficy relative ordering.
+        to specify a relative ordering.
         """
 
         new_tup0 = []
         new_tup1 = []
 
-        # loop over dims
+        # Loop over dims from slowest updating to fastest
         for d0, d1 in zip(tup0, tup1):
             if isinstance(d0, int) and isinstance(d1, int):
 
@@ -236,8 +239,8 @@ def _get_map_for_stmt_inst(insn_id, lex_points, int_sid, out_names_sched):
         dom = knl.get_inames_domain(
             knl.id_to_insn[insn_id].within_inames)
 
-        # create space (an isl space in current implementation)
-        # {('statement', <inames> used in statement domain>) ->
+        # Create map space (an isl space in current implementation)
+        # {('statement', <inames used in statement domain>) ->
         #  (lexicographic ordering dims)}
         dom_inames_ordered = sorted_union_of_names_in_isl_sets([dom])
 
@@ -252,8 +255,7 @@ def _get_map_for_stmt_inst(insn_id, lex_points, int_sid, out_names_sched):
             add_dims_to_isl_set(
                 dom, isl.dim_type.set, [STATEMENT_VAR_NAME], 0), ]
 
-        # Each map representing the schedule will map
-        # statement instances -> lex time.
+        # Each map will map statement instances -> lex time.
         # Right now, statement instance tuples consist of single int.
         # Add all inames from domains to each map domain tuple.
         tuple_pair = [(
@@ -261,21 +263,23 @@ def _get_map_for_stmt_inst(insn_id, lex_points, int_sid, out_names_sched):
             lex_points
             )]
 
-        # create map
+        # Create map
         return create_symbolic_map_from_tuples(
             tuple_pairs_with_domains=zip(tuple_pair, dom_to_intersect),
             space=sched_space,
             )
 
+    # Second, create pairwise schedules for each individual pair of insns
+
     pairwise_schedules = {}
     for insn_ids in insn_id_pairs:
         lex_tuples = [stmt_instances[insn_id] for insn_id in insn_ids]
 
-        # simplify tuples to the extent possible ------------------------------------
+        # Simplify tuples to the extent possible ------------------------------------
 
-        # At this point, pairwise sub-schedule may contain lex point tuples
-        # missing dimensions; the values in these missing dims should
-        # be zero, so add them.
+        # At this point, one of the lex tuples may have more dimensions than another;
+        # the missing dims are the fastest-updating dims, and their values should
+        # be zero. Add them.
         max_lex_dims = max([len(lex_tuple) for lex_tuple in lex_tuples])
         lex_tuples_padded = [
             _pad_tuple_with_zeros(lex_tuple, max_lex_dims)
@@ -285,6 +289,7 @@ def _get_map_for_stmt_inst(insn_id, lex_points, int_sid, out_names_sched):
 
         # Now generate maps from the blueprint --------------------------------------
 
+        # Create names for the output dimensions
         out_names_sched = [
             LEX_VAR_PREFIX+str(i) for i in range(len(lex_tuples_simplified[0]))]
 
diff --git a/loopy/schedule/checker/utils.py b/loopy/schedule/checker/utils.py
index 019117231..10ccc7191 100644
--- a/loopy/schedule/checker/utils.py
+++ b/loopy/schedule/checker/utils.py
@@ -44,6 +44,9 @@ def map_names_match_check(
         assert_subset=True,
         assert_permutation=True,
         ):
+    """Raise an error if names of the specified map dimension do not match
+    the desired names
+    """
 
     obj_map_names = obj_map.space.get_var_names(dim_type)
     if assert_permutation:
@@ -89,23 +92,19 @@ def insert_missing_dims_and_reorder_by_name(
 
     new_set = isl_set.copy()
     for desired_idx, name in enumerate(desired_dims_ordered):
-        # if iname doesn't exist in set, add dim:
+        # If iname doesn't exist in set, add dim
         if name not in new_set.get_var_names(dim_type):
-            # insert missing dim in correct location
+            # Insert missing dim in correct location
             new_set = new_set.insert_dims(
                 dim_type, desired_idx, 1
-                ).set_dim_name(
-                dim_type, desired_idx, name)
-        else:  # iname exists in set
+                ).set_dim_name(dim_type, desired_idx, name)
+        else:  # Iname exists in set
             current_idx = new_set.find_dim_by_name(dim_type, name)
             if current_idx != desired_idx:
-                # move_dims(dst_type, dst_idx, src_type, src_idx, n)
-
-                # first move to other dim because isl is stupid
+                # First move to other dim because isl is stupid
                 new_set = new_set.move_dims(
                     other_dim_type, other_dim_len, dim_type, current_idx, 1)
-
-                # now move it where we actually want it
+                # Now move it where we actually want it
                 new_set = new_set.move_dims(
                     dim_type, desired_idx, other_dim_type, other_dim_len, 1)
 
@@ -134,7 +133,7 @@ def sorted_union_of_names_in_isl_sets(
 
     inames = set().union(*[isl_set.get_var_names(set_dim) for isl_set in isl_sets])
 
-    # sorting is not necessary, but keeps results consistent between runs
+    # Sorting is not necessary, but keeps results consistent between runs
     return sorted(inames)
 
 
@@ -171,7 +170,7 @@ def create_symbolic_map_from_tuples(
     space_out_names = space.get_var_names(dim_type.out)
     space_in_names = space.get_var_names(isl.dim_type.in_)
 
-    # get islvars from space
+    # Get islvars from space
     islvars = isl.affs_from_space(
         space.move_dims(
             isl.dim_type.out, 0,
@@ -180,8 +179,6 @@ def create_symbolic_map_from_tuples(
             ).range()
         )
 
-    # loop through pairs and create a set that will later be converted to a map
-
     def _conjunction_of_dim_eq_conditions(dim_names, values, islvars):
         condition = islvars[0].eq_set(islvars[0])
         for dim_name, val in zip(dim_names, values):
@@ -193,22 +190,24 @@ def _conjunction_of_dim_eq_conditions(dim_names, values, islvars):
                     & islvars[dim_name].eq_set(islvars[val])
         return condition
 
-    # initialize union to empty
+    # Initialize union of maps to empty
     union_of_maps = isl.Map.from_domain(
         islvars[0].eq_set(islvars[0]+1)  # 0 == 1 (false)
         ).move_dims(
             dim_type.out, 0, dim_type.in_, len(space_in_names), len(space_out_names))
+
+    # Loop through tuple pairs
     for (tup_in, tup_out), dom in tuple_pairs_with_domains:
 
-        # set values for 'in' dimension using tuple vals
+        # Set values for 'in' dimension using tuple vals
         condition = _conjunction_of_dim_eq_conditions(
             space_in_names, tup_in, islvars)
 
-        # set values for 'out' dimension using tuple vals
+        # Set values for 'out' dimension using tuple vals
         condition = condition & _conjunction_of_dim_eq_conditions(
             space_out_names, tup_out, islvars)
 
-        # convert set to map by moving dimensions around
+        # Convert set to map by moving dimensions around
         map_from_set = isl.Map.from_domain(condition)
         map_from_set = map_from_set.move_dims(
             dim_type.out, 0, dim_type.in_,
@@ -217,15 +216,15 @@ def _conjunction_of_dim_eq_conditions(dim_names, values, islvars):
         assert space_in_names == map_from_set.get_var_names(
             isl.dim_type.in_)
 
-        # if there are any dimensions in dom that are missing from
+        # If there are any dimensions in dom that are missing from
         # map_from_set, we have a problem I think?
-        # (assertion checks this in add_missing...
+        # (assertion checks this in add_missing...)
         dom_with_all_inames = insert_missing_dims_and_reorder_by_name(
             dom, isl.dim_type.set,
             space_in_names,
             )
 
-        # intersect domain with this map
+        # Intersect domain with this map
         union_of_maps = union_of_maps.union(
             map_from_set.intersect_domain(dom_with_all_inames))
 

From 5551fd9ba664e99042205bf6564d540c8b6781c6 Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Tue, 4 Aug 2020 02:38:41 -0500
Subject: [PATCH 139/460] remove unused arg (knl) from get_EnterLoop_inames()

---
 loopy/schedule/checker/__init__.py | 2 +-
 loopy/schedule/checker/utils.py    | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/loopy/schedule/checker/__init__.py b/loopy/schedule/checker/__init__.py
index 269e7ba05..09ffd6bde 100644
--- a/loopy/schedule/checker/__init__.py
+++ b/loopy/schedule/checker/__init__.py
@@ -104,7 +104,7 @@ def get_schedules_for_statement_pairs(
         get_EnterLoop_inames,
     )
     conc_inames, _ = partition_inames_by_concurrency(knl)
-    enterloop_inames = get_EnterLoop_inames(linearization_items, knl)
+    enterloop_inames = get_EnterLoop_inames(linearization_items)
     conc_loop_inames = conc_inames & enterloop_inames
     if conc_loop_inames:
         from warnings import warn
diff --git a/loopy/schedule/checker/utils.py b/loopy/schedule/checker/utils.py
index 10ccc7191..23880bbfa 100644
--- a/loopy/schedule/checker/utils.py
+++ b/loopy/schedule/checker/utils.py
@@ -254,7 +254,7 @@ def get_insn_id_from_linearization_item(linearization_item):
         return linearization_item.insn_id
 
 
-def get_EnterLoop_inames(linearization_items, knl):
+def get_EnterLoop_inames(linearization_items):
     from loopy.schedule import EnterLoop
     loop_inames = set()
     for linearization_item in linearization_items:

From 222b0c729b07936d13b72763859d41b23857f8b7 Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Tue, 4 Aug 2020 02:56:05 -0500
Subject: [PATCH 140/460] use list comprehension instead of loop in
 get_EnterLoop_inames()

---
 loopy/schedule/checker/utils.py | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/loopy/schedule/checker/utils.py b/loopy/schedule/checker/utils.py
index 23880bbfa..b09c9fb16 100644
--- a/loopy/schedule/checker/utils.py
+++ b/loopy/schedule/checker/utils.py
@@ -256,8 +256,9 @@ def get_insn_id_from_linearization_item(linearization_item):
 
 def get_EnterLoop_inames(linearization_items):
     from loopy.schedule import EnterLoop
-    loop_inames = set()
-    for linearization_item in linearization_items:
-        if isinstance(linearization_item, EnterLoop):
-            loop_inames.add(linearization_item.iname)
-    return loop_inames
+
+    # Note: each iname must live in len-1 list to avoid char separation
+    return set().union(*[
+        [item.iname, ] for item in linearization_items
+        if isinstance(item, EnterLoop)
+        ])

From f1e31d52ffe97d10583469fd4fe870f5a6ae2429 Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Tue, 4 Aug 2020 03:11:10 -0500
Subject: [PATCH 141/460] temporarily add an assert-false to check for
 unnecessary functionality

---
 loopy/schedule/checker/utils.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/loopy/schedule/checker/utils.py b/loopy/schedule/checker/utils.py
index b09c9fb16..babb2b24a 100644
--- a/loopy/schedule/checker/utils.py
+++ b/loopy/schedule/checker/utils.py
@@ -94,6 +94,7 @@ def insert_missing_dims_and_reorder_by_name(
     for desired_idx, name in enumerate(desired_dims_ordered):
         # If iname doesn't exist in set, add dim
         if name not in new_set.get_var_names(dim_type):
+            assert False
             # Insert missing dim in correct location
             new_set = new_set.insert_dims(
                 dim_type, desired_idx, 1

From 8001bd61ad9c3a77c5c9a781bb856c5cf331e66c Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Tue, 4 Aug 2020 03:35:12 -0500
Subject: [PATCH 142/460] handle special case where simplified lex tuples are
 empty (means statements map to the exact same point(s) in the lex ordering,
 which is okay, but to represent this, our lex tuple cannot be empty, so map
 to (0))

---
 loopy/schedule/checker/schedule.py | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/loopy/schedule/checker/schedule.py b/loopy/schedule/checker/schedule.py
index 724053e59..1e6e30fb7 100644
--- a/loopy/schedule/checker/schedule.py
+++ b/loopy/schedule/checker/schedule.py
@@ -231,7 +231,12 @@ def _simplify_lex_dims(tup0, tup1):
                 new_tup0.append(d0)
                 new_tup1.append(d1)
 
-        return tuple(new_tup0), tuple(new_tup1)
+        if len(new_tup0) == 0:
+            # Statements map to the exact same point(s) in the lex ordering,
+            # which is okay, but to represent this, our lex tuple cannot be empty.
+            return (0, ), (0, )
+        else:
+            return tuple(new_tup0), tuple(new_tup1)
 
     def _get_map_for_stmt_inst(insn_id, lex_points, int_sid, out_names_sched):
 

From 567c40545d5c77731d8cc3a097ee72cc3d8e12f9 Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Tue, 4 Aug 2020 04:53:05 -0500
Subject: [PATCH 143/460] don't add missing dims when aligning domain for
 intersection in create_symbolic_map_from_tuples(), all dims will always be
 present by construction; rename
 insert_missing_dims_and_reorder_by_name()->reorder_dims_by_name()

---
 loopy/schedule/checker/utils.py | 46 ++++++++++++---------------------
 1 file changed, 17 insertions(+), 29 deletions(-)

diff --git a/loopy/schedule/checker/utils.py b/loopy/schedule/checker/utils.py
index babb2b24a..3a2c7c682 100644
--- a/loopy/schedule/checker/utils.py
+++ b/loopy/schedule/checker/utils.py
@@ -61,12 +61,12 @@ def map_names_match_check(
                 % (obj_map_names, dim_type, desired_names))
 
 
-def insert_missing_dims_and_reorder_by_name(
+def reorder_dims_by_name(
         isl_set, dim_type, desired_dims_ordered):
     """Return an isl_set with the dimensions in the specified order.
 
     :arg isl_set: A :class:`islpy.Set` whose dimensions are
-        to be reordered and, if necessary, augmented with missing dimensions.
+        to be reordered.
 
     :arg dim_type: A :class:`islpy.dim_type`, i.e., an :class:`int`,
         specifying the dimension to be reordered.
@@ -75,9 +75,7 @@ def insert_missing_dims_and_reorder_by_name(
         representing the desired dimensions in order by dimension name.
 
     :returns: An :class:`islpy.Set` matching `isl_set` with the
-        dimension order matching `desired_dims_ordered`,
-        including additional dimensions present in `desred_dims_ordered`
-        that are not present in `isl_set`.
+        dimension order matching `desired_dims_ordered`.
 
     """
 
@@ -92,22 +90,16 @@ def insert_missing_dims_and_reorder_by_name(
 
     new_set = isl_set.copy()
     for desired_idx, name in enumerate(desired_dims_ordered):
-        # If iname doesn't exist in set, add dim
-        if name not in new_set.get_var_names(dim_type):
-            assert False
-            # Insert missing dim in correct location
-            new_set = new_set.insert_dims(
-                dim_type, desired_idx, 1
-                ).set_dim_name(dim_type, desired_idx, name)
-        else:  # Iname exists in set
-            current_idx = new_set.find_dim_by_name(dim_type, name)
-            if current_idx != desired_idx:
-                # First move to other dim because isl is stupid
-                new_set = new_set.move_dims(
-                    other_dim_type, other_dim_len, dim_type, current_idx, 1)
-                # Now move it where we actually want it
-                new_set = new_set.move_dims(
-                    dim_type, desired_idx, other_dim_type, other_dim_len, 1)
+        assert name in new_set.get_var_names(dim_type)
+
+        current_idx = new_set.find_dim_by_name(dim_type, name)
+        if current_idx != desired_idx:
+            # First move to other dim because isl is stupid
+            new_set = new_set.move_dims(
+                other_dim_type, other_dim_len, dim_type, current_idx, 1)
+            # Now move it where we actually want it
+            new_set = new_set.move_dims(
+                dim_type, desired_idx, other_dim_type, other_dim_len, 1)
 
     return new_set
 
@@ -214,20 +206,16 @@ def _conjunction_of_dim_eq_conditions(dim_names, values, islvars):
             dim_type.out, 0, dim_type.in_,
             len(space_in_names), len(space_out_names))
 
-        assert space_in_names == map_from_set.get_var_names(
-            isl.dim_type.in_)
-
-        # If there are any dimensions in dom that are missing from
-        # map_from_set, we have a problem I think?
-        # (assertion checks this in add_missing...)
-        dom_with_all_inames = insert_missing_dims_and_reorder_by_name(
+        # Align the *out* dims of dom with the space *in_* dims
+        # in preparation for intersection
+        dom_with_set_dim_aligned = reorder_dims_by_name(
             dom, isl.dim_type.set,
             space_in_names,
             )
 
         # Intersect domain with this map
         union_of_maps = union_of_maps.union(
-            map_from_set.intersect_domain(dom_with_all_inames))
+            map_from_set.intersect_domain(dom_with_set_dim_aligned))
 
     return union_of_maps
 

From 3d321ae5d83592162fb5012b743ab32ac5506b18 Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Tue, 4 Aug 2020 05:04:02 -0500
Subject: [PATCH 144/460] more precise docstring for reorder_dims_by_name()

---
 loopy/schedule/checker/utils.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/loopy/schedule/checker/utils.py b/loopy/schedule/checker/utils.py
index 3a2c7c682..b3143a2ff 100644
--- a/loopy/schedule/checker/utils.py
+++ b/loopy/schedule/checker/utils.py
@@ -63,7 +63,8 @@ def map_names_match_check(
 
 def reorder_dims_by_name(
         isl_set, dim_type, desired_dims_ordered):
-    """Return an isl_set with the dimensions in the specified order.
+    """Return an isl_set with the dimensions of the specified dim_type
+    in the specified order.
 
     :arg isl_set: A :class:`islpy.Set` whose dimensions are
         to be reordered.

From 0f1857921263d0e568ee59496a5f2480ed37e975 Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Tue, 4 Aug 2020 12:35:26 -0500
Subject: [PATCH 145/460] make doctest output invalid to see if it fails ci
 test

---
 loopy/schedule/checker/__init__.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/loopy/schedule/checker/__init__.py b/loopy/schedule/checker/__init__.py
index 09ffd6bde..446aeb377 100644
--- a/loopy/schedule/checker/__init__.py
+++ b/loopy/schedule/checker/__init__.py
@@ -80,7 +80,7 @@ def get_schedules_for_statement_pairs(
         ...     [("insn_a", "insn_b")],
         ...     )
         >>> print(*schedules[("insn_a", "insn_b")], sep="\n")
-        [pi, pj, pk] -> { [_lp_linchk_statement = 0, i, j, k] -> [_lp_linchk_l0 = i, _lp_linchk_l1 = 0] : 0 <= i < pi and 0 <= j < pj and 0 <= k < pk }
+        [pi, pj, pk] -> { [_lp_linchk_statement = 777, i, j, k] -> [_lp_linchk_l0 = i, _lp_linchk_l1 = 0] : 0 <= i < pi and 0 <= j < pj and 0 <= k < pk }
         [pi, pj, pk] -> { [_lp_linchk_statement = 1, i, j, k] -> [_lp_linchk_l0 = i, _lp_linchk_l1 = 1] : 0 <= i < pi and 0 <= j < pj and 0 <= k < pk }
 
     """

From 503247dde928408b87244e150dfb13629d088268 Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Tue, 4 Aug 2020 13:39:37 -0500
Subject: [PATCH 146/460] undo intentionally incorrect doctest output, also add
 line break to placate flake8

---
 loopy/schedule/checker/__init__.py | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/loopy/schedule/checker/__init__.py b/loopy/schedule/checker/__init__.py
index 446aeb377..55d2876da 100644
--- a/loopy/schedule/checker/__init__.py
+++ b/loopy/schedule/checker/__init__.py
@@ -80,8 +80,12 @@ def get_schedules_for_statement_pairs(
         ...     [("insn_a", "insn_b")],
         ...     )
         >>> print(*schedules[("insn_a", "insn_b")], sep="\n")
-        [pi, pj, pk] -> { [_lp_linchk_statement = 777, i, j, k] -> [_lp_linchk_l0 = i, _lp_linchk_l1 = 0] : 0 <= i < pi and 0 <= j < pj and 0 <= k < pk }
-        [pi, pj, pk] -> { [_lp_linchk_statement = 1, i, j, k] -> [_lp_linchk_l0 = i, _lp_linchk_l1 = 1] : 0 <= i < pi and 0 <= j < pj and 0 <= k < pk }
+        [pi, pj, pk] -> { [_lp_linchk_statement = 0, i, j, k] -> \
+        [_lp_linchk_l0 = i, _lp_linchk_l1 = 0] : \
+        0 <= i < pi and 0 <= j < pj and 0 <= k < pk }
+        [pi, pj, pk] -> { [_lp_linchk_statement = 1, i, j, k] -> \
+        [_lp_linchk_l0 = i, _lp_linchk_l1 = 1] : \
+        0 <= i < pi and 0 <= j < pj and 0 <= k < pk }
 
     """
 

From 2504e3a5c0bd25b060448a6afde20202bc52e3b4 Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Tue, 4 Aug 2020 13:58:54 -0500
Subject: [PATCH 147/460] try another approach to handling broken lines in
 expected doctest results

---
 loopy/schedule/checker/__init__.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/loopy/schedule/checker/__init__.py b/loopy/schedule/checker/__init__.py
index 55d2876da..89395b198 100644
--- a/loopy/schedule/checker/__init__.py
+++ b/loopy/schedule/checker/__init__.py
@@ -81,11 +81,11 @@ def get_schedules_for_statement_pairs(
         ...     )
         >>> print(*schedules[("insn_a", "insn_b")], sep="\n")
         [pi, pj, pk] -> { [_lp_linchk_statement = 0, i, j, k] -> \
-        [_lp_linchk_l0 = i, _lp_linchk_l1 = 0] : \
-        0 <= i < pi and 0 <= j < pj and 0 <= k < pk }
+[_lp_linchk_l0 = i, _lp_linchk_l1 = 0] : \
+0 <= i < pi and 0 <= j < pj and 0 <= k < pk }
         [pi, pj, pk] -> { [_lp_linchk_statement = 1, i, j, k] -> \
-        [_lp_linchk_l0 = i, _lp_linchk_l1 = 1] : \
-        0 <= i < pi and 0 <= j < pj and 0 <= k < pk }
+[_lp_linchk_l0 = i, _lp_linchk_l1 = 1] : \
+0 <= i < pi and 0 <= j < pj and 0 <= k < pk }
 
     """
 

From 0d7742396f83c25946852a3b8c25990bd9c0e66a Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Tue, 4 Aug 2020 14:19:01 -0500
Subject: [PATCH 148/460] workaround for dumb doctest that can't handle
 expected output split up across lines: add line breaks to print statement

---
 loopy/schedule/checker/__init__.py | 18 +++++++++++-------
 1 file changed, 11 insertions(+), 7 deletions(-)

diff --git a/loopy/schedule/checker/__init__.py b/loopy/schedule/checker/__init__.py
index 89395b198..bb96ebbaa 100644
--- a/loopy/schedule/checker/__init__.py
+++ b/loopy/schedule/checker/__init__.py
@@ -79,13 +79,17 @@ def get_schedules_for_statement_pairs(
         ...     knl.linearization,
         ...     [("insn_a", "insn_b")],
         ...     )
-        >>> print(*schedules[("insn_a", "insn_b")], sep="\n")
-        [pi, pj, pk] -> { [_lp_linchk_statement = 0, i, j, k] -> \
-[_lp_linchk_l0 = i, _lp_linchk_l1 = 0] : \
-0 <= i < pi and 0 <= j < pj and 0 <= k < pk }
-        [pi, pj, pk] -> { [_lp_linchk_statement = 1, i, j, k] -> \
-[_lp_linchk_l0 = i, _lp_linchk_l1 = 1] : \
-0 <= i < pi and 0 <= j < pj and 0 <= k < pk }
+        >>> # Print maps
+        >>> print("\n".join(
+        ...     str(m).replace("{ ", "{\n").replace(" :", "\n:")
+        ...     for m in schedules[("insn_a", "insn_b")]
+        ...     ))
+        [pi, pj, pk] -> {
+        [_lp_linchk_statement = 0, i, j, k] -> [_lp_linchk_l0 = i, _lp_linchk_l1 = 0]
+        : 0 <= i < pi and 0 <= j < pj and 0 <= k < pk }
+        [pi, pj, pk] -> {
+        [_lp_linchk_statement = 1, i, j, k] -> [_lp_linchk_l0 = i, _lp_linchk_l1 = 1]
+        : 0 <= i < pi and 0 <= j < pj and 0 <= k < pk }
 
     """
 

From 049d17b4ecc7c1aa39011aaeaa55956d9ccc1e4c Mon Sep 17 00:00:00 2001
From: James Stevens <jdsteve2@illinois.edu>
Date: Fri, 28 Aug 2020 04:19:45 +0200
Subject: [PATCH 149/460] Apply 1 suggestion(s) to 1 file(s)

---
 loopy/schedule/checker/schedule.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/loopy/schedule/checker/schedule.py b/loopy/schedule/checker/schedule.py
index 1e6e30fb7..b088c40f1 100644
--- a/loopy/schedule/checker/schedule.py
+++ b/loopy/schedule/checker/schedule.py
@@ -114,7 +114,7 @@ def generate_pairwise_schedules(
             # (not technically necessary if no statement was added in the
             # previous section; gratuitous incrementing is counteracted
             # in the simplification step below)
-            next_insn_lex_tuple[-1] = next_insn_lex_tuple[-1]+1
+            next_insn_lex_tuple[-1] += 1
 
             # Upon entering a loop, add one lex dimension for the loop variable,
             # add second lex dim to enumerate sections of code within new loop

From 15f085598653d904e62d4758818b21aff010299d Mon Sep 17 00:00:00 2001
From: James Stevens <jdsteve2@illinois.edu>
Date: Fri, 28 Aug 2020 04:20:20 +0200
Subject: [PATCH 150/460] Apply 1 suggestion(s) to 1 file(s)

---
 loopy/schedule/checker/schedule.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/loopy/schedule/checker/schedule.py b/loopy/schedule/checker/schedule.py
index b088c40f1..bc249952a 100644
--- a/loopy/schedule/checker/schedule.py
+++ b/loopy/schedule/checker/schedule.py
@@ -44,7 +44,7 @@
 
 LIN_CHECK_IDENTIFIER_PREFIX = "_lp_linchk_"
 LEX_VAR_PREFIX = "%sl" % (LIN_CHECK_IDENTIFIER_PREFIX)
-STATEMENT_VAR_NAME = "%sstatement" % (LIN_CHECK_IDENTIFIER_PREFIX)
+STATEMENT_VAR_NAME = "%sstmt" % (LIN_CHECK_IDENTIFIER_PREFIX)
 
 
 def generate_pairwise_schedules(

From 2f583e5468b99d6b643cb7aa9d84fe09a187e03a Mon Sep 17 00:00:00 2001
From: James Stevens <jdsteve2@illinois.edu>
Date: Fri, 28 Aug 2020 04:21:01 +0200
Subject: [PATCH 151/460] Apply 1 suggestion(s) to 1 file(s)

---
 loopy/schedule/checker/schedule.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/loopy/schedule/checker/schedule.py b/loopy/schedule/checker/schedule.py
index bc249952a..0380bd4f0 100644
--- a/loopy/schedule/checker/schedule.py
+++ b/loopy/schedule/checker/schedule.py
@@ -30,8 +30,8 @@
 
 .. data:: LEX_VAR_PREFIX
 
-    E.g., a prefix of "_lp_linchk_lex" might yield lexicographic dimension
-    variables "_lp_linchk_lex0", "_lp_linchk_lex1", "_lp_linchk_lex2". Cf.
+    E.g., a prefix of ``_lp_linchk_lex`` might yield lexicographic dimension
+    variables ``_lp_linchk_lex0``, ``_lp_linchk_lex1``, ``_lp_linchk_lex2``. Cf.
     :ref:`reserved-identifiers`.
 
 .. data:: STATEMENT_VAR_NAME

From 05d57062a681bb6f0c85a91d4f88120567953005 Mon Sep 17 00:00:00 2001
From: James Stevens <jdsteve2@illinois.edu>
Date: Fri, 28 Aug 2020 04:21:22 +0200
Subject: [PATCH 152/460] Apply 1 suggestion(s) to 1 file(s)

---
 loopy/schedule/checker/__init__.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/loopy/schedule/checker/__init__.py b/loopy/schedule/checker/__init__.py
index bb96ebbaa..99525a983 100644
--- a/loopy/schedule/checker/__init__.py
+++ b/loopy/schedule/checker/__init__.py
@@ -46,8 +46,7 @@ def get_schedules_for_statement_pairs(
         process.
 
     :arg insn_id_pairs: A list of two-tuples containing pairs of instruction
-        identifiers, each of which is unique within a
-        :class:`loopy.kernel.LoopKernel`.
+        identifiers.
 
     :returns: A dictionary mapping each two-tuple of instruction identifiers
         provided in `insn_id_pairs` to a corresponding two-tuple containing two

From 8a882e008a68bd89779053458c2b543d5fb21fb2 Mon Sep 17 00:00:00 2001
From: James Stevens <jdsteve2@illinois.edu>
Date: Fri, 28 Aug 2020 04:21:37 +0200
Subject: [PATCH 153/460] Apply 1 suggestion(s) to 1 file(s)

---
 loopy/schedule/checker/__init__.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/loopy/schedule/checker/__init__.py b/loopy/schedule/checker/__init__.py
index 99525a983..e41215897 100644
--- a/loopy/schedule/checker/__init__.py
+++ b/loopy/schedule/checker/__init__.py
@@ -45,7 +45,7 @@ def get_schedules_for_statement_pairs(
         kernel since this function may be used during the linearization
         process.
 
-    :arg insn_id_pairs: A list of two-tuples containing pairs of instruction
+    :arg insn_id_pairs: A list containing pairs of instruction
         identifiers.
 
     :returns: A dictionary mapping each two-tuple of instruction identifiers

From 0985a6b2bbfb569ee4f1f2e4c5a3f9d403114efe Mon Sep 17 00:00:00 2001
From: James Stevens <jdsteve2@illinois.edu>
Date: Fri, 28 Aug 2020 04:21:59 +0200
Subject: [PATCH 154/460] Apply 1 suggestion(s) to 1 file(s)

---
 loopy/schedule/checker/schedule.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/loopy/schedule/checker/schedule.py b/loopy/schedule/checker/schedule.py
index 0380bd4f0..a438ca1b6 100644
--- a/loopy/schedule/checker/schedule.py
+++ b/loopy/schedule/checker/schedule.py
@@ -137,7 +137,7 @@ def generate_pairwise_schedules(
             # (not technically necessary if no statement was added in the
             # previous section; gratuitous incrementing is counteracted
             # in the simplification step below)
-            next_insn_lex_tuple[-1] = next_insn_lex_tuple[-1]+1
+            next_insn_lex_tuple[-1] += 1
 
         elif isinstance(linearization_item, (RunInstruction, Barrier)):
             from loopy.schedule.checker.utils import (

From 88557eedf4e17331b7d7726eb1154479920b357e Mon Sep 17 00:00:00 2001
From: James Stevens <jdsteve2@illinois.edu>
Date: Fri, 28 Aug 2020 04:22:11 +0200
Subject: [PATCH 155/460] Apply 1 suggestion(s) to 1 file(s)

---
 loopy/schedule/checker/schedule.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/loopy/schedule/checker/schedule.py b/loopy/schedule/checker/schedule.py
index a438ca1b6..6d32f36b9 100644
--- a/loopy/schedule/checker/schedule.py
+++ b/loopy/schedule/checker/schedule.py
@@ -160,7 +160,7 @@ def generate_pairwise_schedules(
             # Only process listed insns, otherwise ignore
             if lp_insn_id in all_insn_ids:
                 # Add item to stmt_instances
-                stmt_instances[lp_insn_id] = tuple(next_insn_lex_tuple[:])
+                stmt_instances[lp_insn_id] = tuple(next_insn_lex_tuple)
 
                 # Increment lex dim val enumerating items in current section of code
                 next_insn_lex_tuple[-1] = next_insn_lex_tuple[-1] + 1

From 5f9d4295a91e51507726b357001598c1a53f2f0c Mon Sep 17 00:00:00 2001
From: James Stevens <jdsteve2@illinois.edu>
Date: Fri, 28 Aug 2020 04:22:17 +0200
Subject: [PATCH 156/460] Apply 1 suggestion(s) to 1 file(s)

---
 loopy/schedule/checker/schedule.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/loopy/schedule/checker/schedule.py b/loopy/schedule/checker/schedule.py
index 6d32f36b9..dec9cf24a 100644
--- a/loopy/schedule/checker/schedule.py
+++ b/loopy/schedule/checker/schedule.py
@@ -163,7 +163,7 @@ def generate_pairwise_schedules(
                 stmt_instances[lp_insn_id] = tuple(next_insn_lex_tuple)
 
                 # Increment lex dim val enumerating items in current section of code
-                next_insn_lex_tuple[-1] = next_insn_lex_tuple[-1] + 1
+                next_insn_lex_tuple[-1] += 1
 
         else:
             from loopy.schedule import (CallKernel, ReturnFromKernel)

From 0b1994113c394efcd2fc59f7f8b0ee6c9bc91962 Mon Sep 17 00:00:00 2001
From: James Stevens <jdsteve2@illinois.edu>
Date: Fri, 28 Aug 2020 04:23:00 +0200
Subject: [PATCH 157/460] Apply 1 suggestion(s) to 1 file(s)

---
 loopy/schedule/checker/utils.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/loopy/schedule/checker/utils.py b/loopy/schedule/checker/utils.py
index b3143a2ff..92b471f6d 100644
--- a/loopy/schedule/checker/utils.py
+++ b/loopy/schedule/checker/utils.py
@@ -37,7 +37,7 @@ def add_dims_to_isl_set(isl_set, dim_type, names, new_idx_start):
     return new_set
 
 
-def map_names_match_check(
+def check_that_map_names_match(
         obj_map,
         desired_names,
         dim_type,

From a6ab09d4c2faa7aea81f15b16a54ed8153e93001 Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Thu, 27 Aug 2020 21:32:42 -0500
Subject: [PATCH 158/460] change _lp_linchk_statement->_lp_linchk_stmt in
 doctest after renaming of module prefix

---
 loopy/schedule/checker/__init__.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/loopy/schedule/checker/__init__.py b/loopy/schedule/checker/__init__.py
index e41215897..0935e22f0 100644
--- a/loopy/schedule/checker/__init__.py
+++ b/loopy/schedule/checker/__init__.py
@@ -84,10 +84,10 @@ def get_schedules_for_statement_pairs(
         ...     for m in schedules[("insn_a", "insn_b")]
         ...     ))
         [pi, pj, pk] -> {
-        [_lp_linchk_statement = 0, i, j, k] -> [_lp_linchk_l0 = i, _lp_linchk_l1 = 0]
+        [_lp_linchk_stmt = 0, i, j, k] -> [_lp_linchk_l0 = i, _lp_linchk_l1 = 0]
         : 0 <= i < pi and 0 <= j < pj and 0 <= k < pk }
         [pi, pj, pk] -> {
-        [_lp_linchk_statement = 1, i, j, k] -> [_lp_linchk_l0 = i, _lp_linchk_l1 = 1]
+        [_lp_linchk_stmt = 1, i, j, k] -> [_lp_linchk_l0 = i, _lp_linchk_l1 = 1]
         : 0 <= i < pi and 0 <= j < pj and 0 <= k < pk }
 
     """

From 852ba31431b87859fdf0c58e7077417a0608a0f5 Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Thu, 27 Aug 2020 21:34:56 -0500
Subject: [PATCH 159/460] change
 map_names_match_check->check_that_map_names_match after renaming of function

---
 loopy/schedule/checker/utils.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/loopy/schedule/checker/utils.py b/loopy/schedule/checker/utils.py
index 92b471f6d..548ef2db2 100644
--- a/loopy/schedule/checker/utils.py
+++ b/loopy/schedule/checker/utils.py
@@ -80,7 +80,7 @@ def reorder_dims_by_name(
 
     """
 
-    map_names_match_check(
+    check_that_map_names_match(
         isl_set, desired_dims_ordered, dim_type,
         assert_subset=True, assert_permutation=False)
 
@@ -109,7 +109,7 @@ def ensure_dim_names_match_and_align(obj_map, tgt_map):
 
     # first make sure names match
     for dt in [isl.dim_type.in_, isl.dim_type.out, isl.dim_type.param]:
-        map_names_match_check(
+        check_that_map_names_match(
             obj_map, tgt_map.get_var_names(dt), dt,
             assert_permutation=True)
 

From 88f23ead8694360d35c6b877bbc3823d18fd763d Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Thu, 27 Aug 2020 21:42:42 -0500
Subject: [PATCH 160/460] rephrase docstring for linearization_items argument
 (the part about a partial list of linearization items)

---
 loopy/schedule/checker/__init__.py | 6 +++---
 loopy/schedule/checker/schedule.py | 6 +++---
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/loopy/schedule/checker/__init__.py b/loopy/schedule/checker/__init__.py
index 0935e22f0..5572b03e8 100644
--- a/loopy/schedule/checker/__init__.py
+++ b/loopy/schedule/checker/__init__.py
@@ -41,9 +41,9 @@ def get_schedules_for_statement_pairs(
     :arg linearization_items: A list of :class:`loopy.schedule.ScheduleItem`
         (to be renamed to `loopy.schedule.LinearizationItem`) containing
         all linearization items for which pairwise schedules will be
-        created. This list may be a *partial* linearization for a
-        kernel since this function may be used during the linearization
-        process.
+        created. To allow usage of this routine during linearization, a
+        truncated (i.e. partial) linearization may be passed through this
+        argument.
 
     :arg insn_id_pairs: A list containing pairs of instruction
         identifiers.
diff --git a/loopy/schedule/checker/schedule.py b/loopy/schedule/checker/schedule.py
index dec9cf24a..4b1b4e07c 100644
--- a/loopy/schedule/checker/schedule.py
+++ b/loopy/schedule/checker/schedule.py
@@ -67,9 +67,9 @@ def generate_pairwise_schedules(
     :arg linearization_items: A list of :class:`loopy.schedule.ScheduleItem`
         (to be renamed to `loopy.schedule.LinearizationItem`) containing
         all linearization items for which pairwise schedules will be
-        created. This list may be a *partial* linearization for a
-        kernel since this function may be used during the linearization
-        process.
+        created. To allow usage of this routine during linearization, a
+        truncated (i.e. partial) linearization may be passed through this
+        argument.
 
     :arg insn_id_pairs: A list of two-tuples containing pairs of instruction
         identifiers, each of which is unique within a

From d72e65375cae25e1a99252feabbcaf8096af8abc Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Thu, 27 Aug 2020 22:18:16 -0500
Subject: [PATCH 161/460] assert that all concurrent EnterLoop inames are
 tagged Vec or ILP, and don't warn if any such inames are found

---
 loopy/schedule/checker/__init__.py | 13 +++++++------
 loopy/schedule/checker/schedule.py |  3 ++-
 2 files changed, 9 insertions(+), 7 deletions(-)

diff --git a/loopy/schedule/checker/__init__.py b/loopy/schedule/checker/__init__.py
index 5572b03e8..3bfb3822a 100644
--- a/loopy/schedule/checker/__init__.py
+++ b/loopy/schedule/checker/__init__.py
@@ -113,12 +113,13 @@ def get_schedules_for_statement_pairs(
     conc_inames, _ = partition_inames_by_concurrency(knl)
     enterloop_inames = get_EnterLoop_inames(linearization_items)
     conc_loop_inames = conc_inames & enterloop_inames
-    if conc_loop_inames:
-        from warnings import warn
-        warn(
-            "get_schedule_for_statement_pair encountered EnterLoop for inames %s "
-            "with ConcurrentTag(s) in linearization for kernel %s. "
-            "Ignoring these loops." % (conc_loop_inames, knl.name))
+
+    # The only concurrent EnterLoop inames should be Vec and ILP
+    from loopy.kernel.data import (VectorizeTag, IlpBaseTag)
+    for conc_iname in conc_loop_inames:
+        assert any(
+            isinstance(tag, (VectorizeTag, IlpBaseTag))
+            for tag in knl.iname_to_tags[conc_iname])
 
     # }}}
 
diff --git a/loopy/schedule/checker/schedule.py b/loopy/schedule/checker/schedule.py
index 4b1b4e07c..15440c520 100644
--- a/loopy/schedule/checker/schedule.py
+++ b/loopy/schedule/checker/schedule.py
@@ -77,7 +77,8 @@ def generate_pairwise_schedules(
 
     :arg loops_to_ignore: A set of inames that will be ignored when
         determining the relative ordering of statements. This will typically
-        contain concurrent inames.
+        contain concurrent inames tagged with the ``vec`` or ``ilp`` array
+        access tags.
 
     :returns: A dictionary mapping each two-tuple of instruction identifiers
         provided in `insn_id_pairs` to a corresponding two-tuple containing two

From 559f7781ef63128e26301ac8f138b9497d337daf Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Thu, 27 Aug 2020 22:28:10 -0500
Subject: [PATCH 162/460] simplify phrasing of insn_id_pairs arg description
 (to match phrasing in generate_pairwise_schedules())

---
 loopy/schedule/checker/schedule.py | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/loopy/schedule/checker/schedule.py b/loopy/schedule/checker/schedule.py
index 15440c520..aeac8bdfc 100644
--- a/loopy/schedule/checker/schedule.py
+++ b/loopy/schedule/checker/schedule.py
@@ -71,9 +71,7 @@ def generate_pairwise_schedules(
         truncated (i.e. partial) linearization may be passed through this
         argument.
 
-    :arg insn_id_pairs: A list of two-tuples containing pairs of instruction
-        identifiers, each of which is unique within a
-        :class:`loopy.kernel.LoopKernel`.
+    :arg insn_id_pairs: A list containing pairs of instruction identifiers.
 
     :arg loops_to_ignore: A set of inames that will be ignored when
         determining the relative ordering of statements. This will typically

From b992340916ee2a46a4f16316c84959ea2a758cc5 Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Thu, 27 Aug 2020 22:36:23 -0500
Subject: [PATCH 163/460] move function defs for _pad_tuple_with_zeros() and
 _simplify_lex_dims() outside of generate_pairwise_schedules()

---
 loopy/schedule/checker/schedule.py | 114 +++++++++++++++--------------
 1 file changed, 58 insertions(+), 56 deletions(-)

diff --git a/loopy/schedule/checker/schedule.py b/loopy/schedule/checker/schedule.py
index aeac8bdfc..bc71df5d8 100644
--- a/loopy/schedule/checker/schedule.py
+++ b/loopy/schedule/checker/schedule.py
@@ -47,6 +47,64 @@
 STATEMENT_VAR_NAME = "%sstmt" % (LIN_CHECK_IDENTIFIER_PREFIX)
 
 
+def _pad_tuple_with_zeros(tup, desired_length):
+    return tup[:] + tuple([0]*(desired_length-len(tup)))
+
+
+def _simplify_lex_dims(tup0, tup1):
+    """Simplify a pair of lex tuples in order to reduce the complexity of
+    resulting maps. Remove lex tuple dimensions with matching integer values
+    since these do not provide information on relative ordering. Once a
+    dimension is found where both tuples have non-matching integer values,
+    remove any faster-updating lex dimensions since they are not necessary
+    to specify a relative ordering.
+    """
+
+    new_tup0 = []
+    new_tup1 = []
+
+    # Loop over dims from slowest updating to fastest
+    for d0, d1 in zip(tup0, tup1):
+        if isinstance(d0, int) and isinstance(d1, int):
+
+            # Both vals are ints for this dim
+            if d0 == d1:
+                # Do not keep this dim
+                continue
+            elif d0 > d1:
+                # These ints inform us about the relative ordering of
+                # two statements. While their values may be larger than 1 in
+                # the lexicographic ordering describing a larger set of
+                # statements, in a pairwise schedule, only ints 0 and 1 are
+                # necessary to specify relative order. To keep the pairwise
+                # schedules as simple and comprehensible as possible, use only
+                # integers 0 and 1 to specify this relative ordering.
+                # (doesn't take much extra time since we are already going
+                # through these to remove unnecessary lex tuple dims)
+                new_tup0.append(1)
+                new_tup1.append(0)
+
+                # No further dims needed to fully specify ordering
+                break
+            else:  # d1 > d0
+                new_tup0.append(0)
+                new_tup1.append(1)
+
+                # No further dims needed to fully specify ordering
+                break
+        else:
+            # Keep this dim without modifying
+            new_tup0.append(d0)
+            new_tup1.append(d1)
+
+    if len(new_tup0) == 0:
+        # Statements map to the exact same point(s) in the lex ordering,
+        # which is okay, but to represent this, our lex tuple cannot be empty.
+        return (0, ), (0, )
+    else:
+        return tuple(new_tup0), tuple(new_tup1)
+
+
 def generate_pairwise_schedules(
         knl,
         linearization_items,
@@ -181,62 +239,6 @@ def generate_pairwise_schedules(
         add_dims_to_isl_set,
     )
 
-    def _pad_tuple_with_zeros(tup, desired_length):
-        return tup[:] + tuple([0]*(desired_length-len(tup)))
-
-    def _simplify_lex_dims(tup0, tup1):
-        """Simplify a pair of lex tuples in order to reduce the complexity of
-        resulting maps. Remove lex tuple dimensions with matching integer values
-        since these do not provide information on relative ordering. Once a
-        dimension is found where both tuples have non-matching integer values,
-        remove any faster-updating lex dimensions since they are not necessary
-        to specify a relative ordering.
-        """
-
-        new_tup0 = []
-        new_tup1 = []
-
-        # Loop over dims from slowest updating to fastest
-        for d0, d1 in zip(tup0, tup1):
-            if isinstance(d0, int) and isinstance(d1, int):
-
-                # Both vals are ints for this dim
-                if d0 == d1:
-                    # Do not keep this dim
-                    continue
-                elif d0 > d1:
-                    # These ints inform us about the relative ordering of
-                    # two statements. While their values may be larger than 1 in
-                    # the lexicographic ordering describing a larger set of
-                    # statements, in a pairwise schedule, only ints 0 and 1 are
-                    # necessary to specify relative order. To keep the pairwise
-                    # schedules as simple and comprehensible as possible, use only
-                    # integers 0 and 1 to specify this relative ordering.
-                    # (doesn't take much extra time since we are already going
-                    # through these to remove unnecessary lex tuple dims)
-                    new_tup0.append(1)
-                    new_tup1.append(0)
-
-                    # No further dims needed to fully specify ordering
-                    break
-                else:  # d1 > d0
-                    new_tup0.append(0)
-                    new_tup1.append(1)
-
-                    # No further dims needed to fully specify ordering
-                    break
-            else:
-                # Keep this dim without modifying
-                new_tup0.append(d0)
-                new_tup1.append(d1)
-
-        if len(new_tup0) == 0:
-            # Statements map to the exact same point(s) in the lex ordering,
-            # which is okay, but to represent this, our lex tuple cannot be empty.
-            return (0, ), (0, )
-        else:
-            return tuple(new_tup0), tuple(new_tup1)
-
     def _get_map_for_stmt_inst(insn_id, lex_points, int_sid, out_names_sched):
 
         # Get inames domain for statement instance (a BasicSet)

From 0921a33b5ec57fe2779407b5b02ea2904f1eca54 Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Thu, 27 Aug 2020 22:46:11 -0500
Subject: [PATCH 164/460] remove faulthandler stuff

---
 test/test_linearization_checker.py | 7 -------
 1 file changed, 7 deletions(-)

diff --git a/test/test_linearization_checker.py b/test/test_linearization_checker.py
index 6d0fd3abf..ade47f0c6 100644
--- a/test/test_linearization_checker.py
+++ b/test/test_linearization_checker.py
@@ -42,13 +42,6 @@
 
 logger = logging.getLogger(__name__)
 
-try:
-    import faulthandler
-except ImportError:
-    pass
-else:
-    faulthandler.enable()
-
 
 def test_lexschedule_creation():
     import islpy as isl

From 237e7d69e000f70ab3321480d244bcd20d164930 Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Thu, 27 Aug 2020 22:46:55 -0500
Subject: [PATCH 165/460] remove redundant lang_version

---
 test/test_linearization_checker.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/test/test_linearization_checker.py b/test/test_linearization_checker.py
index ade47f0c6..56e1c0722 100644
--- a/test/test_linearization_checker.py
+++ b/test/test_linearization_checker.py
@@ -76,7 +76,6 @@ def test_lexschedule_creation():
         """,
         name="example",
         assumptions="pi,pj,pk,pt >= 1",
-        lang_version=(2018, 2)
         )
     knl = lp.add_and_infer_dtypes(
             knl,

From 74f9ee40732f0bed15d53e13cb1c471c589843bd Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Thu, 27 Aug 2020 22:59:11 -0500
Subject: [PATCH 166/460] test_lexschedule_creation(), make kernel
 instruction/loop order deterministic and remove machinery for handling
 multiple potential orderings

---
 test/test_linearization_checker.py | 230 ++++++++++++-----------------
 1 file changed, 92 insertions(+), 138 deletions(-)

diff --git a/test/test_linearization_checker.py b/test/test_linearization_checker.py
index 56e1c0722..3c927a9ce 100644
--- a/test/test_linearization_checker.py
+++ b/test/test_linearization_checker.py
@@ -53,6 +53,8 @@ def test_lexschedule_creation():
     )
 
     # example kernel
+    # insn_c depends on insn_b only to create deterministic order
+    # insn_d depends on insn_c only to create deterministic order
     knl = lp.make_kernel(
         [
             "{[i]: 0<=i<pi}",
@@ -67,11 +69,11 @@ def test_lexschedule_creation():
             end
             for j
                 a[i,j] = temp + 1  {id=insn_b,dep=insn_a}
-                c[i,j] = d[i,j]  {id=insn_c}
+                c[i,j] = d[i,j]  {id=insn_c,dep=insn_b}
             end
         end
         for t
-            e[t] = f[t]  {id=insn_d}
+            e[t] = f[t]  {id=insn_d, dep=insn_c}
         end
         """,
         name="example",
@@ -88,15 +90,6 @@ def test_lexschedule_creation():
     knl = get_one_linearized_kernel(knl)
     linearization_items = knl.linearization
 
-    # There are multiple potential linearization orders for this kernel, so when
-    # performing our comparisons for schedule correctness, we need to know which
-    # order loopy chose.
-    from loopy.schedule import RunInstruction
-    linearized_insn_ord = []
-    for item in linearization_items:
-        if isinstance(item, RunInstruction):
-            linearized_insn_ord.append(item.insn_id)
-
     def _lex_space_string(dim_vals):
         # Return a string describing lex space dimension assignments
         # (used to create maps below)
@@ -182,165 +175,126 @@ def _lex_space_string(dim_vals):
     # ------------------------------------------------------------------------------
     # Relationship between insn_a and insn_d ---------------------------------------
 
-    # insn_a and insn_d could have been linearized in either order
-    # (i loop could be before or after t loop)
-    def perform_insn_ad_checks_with(a_lex_idx, d_lex_idx):
-        # Get two maps
-        sched_map_before, sched_map_after = sched_maps[("insn_a", "insn_d")]
+    # Get two maps
+    sched_map_before, sched_map_after = sched_maps[("insn_a", "insn_d")]
 
-        # Create expected maps, align, compare
+    # Create expected maps, align, compare
 
-        sched_map_before_expected = isl.Map(
-            "[pi, pk] -> { [%s=0, i, k] -> [%s] : 0 <= i < pi and 0 <= k < pk }"
-            % (
-                STATEMENT_VAR_NAME,
-                _lex_space_string([a_lex_idx, ]),
-                )
-            )
-        sched_map_before_expected = ensure_dim_names_match_and_align(
-            sched_map_before_expected, sched_map_before)
-
-        sched_map_after_expected = isl.Map(
-            "[pt] -> { [%s=1, t] -> [%s] : 0 <= t < pt }"
-            % (
-                STATEMENT_VAR_NAME,
-                _lex_space_string([d_lex_idx, ]),
-                )
+    sched_map_before_expected = isl.Map(
+        "[pi, pk] -> { [%s=0, i, k] -> [%s] : 0 <= i < pi and 0 <= k < pk }"
+        % (
+            STATEMENT_VAR_NAME,
+            _lex_space_string([0, ]),
             )
-        sched_map_after_expected = ensure_dim_names_match_and_align(
-            sched_map_after_expected, sched_map_after)
+        )
+    sched_map_before_expected = ensure_dim_names_match_and_align(
+        sched_map_before_expected, sched_map_before)
 
-        assert sched_map_before == sched_map_before_expected
-        assert sched_map_after == sched_map_after_expected
+    sched_map_after_expected = isl.Map(
+        "[pt] -> { [%s=1, t] -> [%s] : 0 <= t < pt }"
+        % (
+            STATEMENT_VAR_NAME,
+            _lex_space_string([1, ]),
+            )
+        )
+    sched_map_after_expected = ensure_dim_names_match_and_align(
+        sched_map_after_expected, sched_map_after)
 
-    if linearized_insn_ord.index("insn_a") < linearized_insn_ord.index("insn_d"):
-        # insn_a was linearized first, check schedule accordingly
-        perform_insn_ad_checks_with(0, 1)
-    else:
-        # insn_d was linearized first, check schedule accordingly
-        perform_insn_ad_checks_with(1, 0)
+    assert sched_map_before == sched_map_before_expected
+    assert sched_map_after == sched_map_after_expected
 
     # ------------------------------------------------------------------------------
     # Relationship between insn_b and insn_c ---------------------------------------
 
-    # insn_b and insn_c could have been linearized in either order
-    def perform_insn_bc_checks_with(b_lex_idx, c_lex_idx):
-        # Get two maps
-        sched_map_before, sched_map_after = sched_maps[("insn_b", "insn_c")]
+    # Get two maps
+    sched_map_before, sched_map_after = sched_maps[("insn_b", "insn_c")]
 
-        # Create expected maps, align, compare
+    # Create expected maps, align, compare
 
-        sched_map_before_expected = isl.Map(
-            "[pi, pj] -> { [%s=0, i, j] -> [%s] : 0 <= i < pi and 0 <= j < pj }"
-            % (
-                STATEMENT_VAR_NAME,
-                _lex_space_string(["i", "j", b_lex_idx]),
-                )
-            )
-        sched_map_before_expected = ensure_dim_names_match_and_align(
-            sched_map_before_expected, sched_map_before)
-
-        sched_map_after_expected = isl.Map(
-            "[pi, pj] -> { [%s=1, i, j] -> [%s] : 0 <= i < pi and 0 <= j < pj }"
-            % (
-                STATEMENT_VAR_NAME,
-                _lex_space_string(["i", "j", c_lex_idx]),
-                )
+    sched_map_before_expected = isl.Map(
+        "[pi, pj] -> { [%s=0, i, j] -> [%s] : 0 <= i < pi and 0 <= j < pj }"
+        % (
+            STATEMENT_VAR_NAME,
+            _lex_space_string(["i", "j", 0]),
             )
-        sched_map_after_expected = ensure_dim_names_match_and_align(
-            sched_map_after_expected, sched_map_after)
+        )
+    sched_map_before_expected = ensure_dim_names_match_and_align(
+        sched_map_before_expected, sched_map_before)
 
-        assert sched_map_before == sched_map_before_expected
-        assert sched_map_after == sched_map_after_expected
+    sched_map_after_expected = isl.Map(
+        "[pi, pj] -> { [%s=1, i, j] -> [%s] : 0 <= i < pi and 0 <= j < pj }"
+        % (
+            STATEMENT_VAR_NAME,
+            _lex_space_string(["i", "j", 1]),
+            )
+        )
+    sched_map_after_expected = ensure_dim_names_match_and_align(
+        sched_map_after_expected, sched_map_after)
 
-    if linearized_insn_ord.index("insn_b") < linearized_insn_ord.index("insn_c"):
-        # insn_b was linearized first, check schedule accordingly
-        perform_insn_bc_checks_with(0, 1)
-    else:
-        # insn_c was linearized first, check schedule accordingly
-        perform_insn_bc_checks_with(1, 0)
+    assert sched_map_before == sched_map_before_expected
+    assert sched_map_after == sched_map_after_expected
 
     # ------------------------------------------------------------------------------
     # Relationship between insn_b and insn_d ---------------------------------------
 
-    # insn_b and insn_d could have been linearized in either order
-    # (i loop could be before or after t loop)
-    def perform_insn_bd_checks_with(b_lex_idx, d_lex_idx):
-        # Get two maps
-        sched_map_before, sched_map_after = sched_maps[("insn_b", "insn_d")]
+    # Get two maps
+    sched_map_before, sched_map_after = sched_maps[("insn_b", "insn_d")]
 
-        # Create expected maps, align, compare
+    # Create expected maps, align, compare
 
-        sched_map_before_expected = isl.Map(
-            "[pi, pj] -> { [%s=0, i, j] -> [%s] : 0 <= i < pi and 0 <= j < pj }"
-            % (
-                STATEMENT_VAR_NAME,
-                _lex_space_string([b_lex_idx, ]),
-                )
-            )
-        sched_map_before_expected = ensure_dim_names_match_and_align(
-            sched_map_before_expected, sched_map_before)
-
-        sched_map_after_expected = isl.Map(
-            "[pt] -> { [%s=1, t] -> [%s] : 0 <= t < pt }"
-            % (
-                STATEMENT_VAR_NAME,
-                _lex_space_string([d_lex_idx, ]),
-                )
+    sched_map_before_expected = isl.Map(
+        "[pi, pj] -> { [%s=0, i, j] -> [%s] : 0 <= i < pi and 0 <= j < pj }"
+        % (
+            STATEMENT_VAR_NAME,
+            _lex_space_string([0, ]),
             )
-        sched_map_after_expected = ensure_dim_names_match_and_align(
-            sched_map_after_expected, sched_map_after)
+        )
+    sched_map_before_expected = ensure_dim_names_match_and_align(
+        sched_map_before_expected, sched_map_before)
 
-        assert sched_map_before == sched_map_before_expected
-        assert sched_map_after == sched_map_after_expected
+    sched_map_after_expected = isl.Map(
+        "[pt] -> { [%s=1, t] -> [%s] : 0 <= t < pt }"
+        % (
+            STATEMENT_VAR_NAME,
+            _lex_space_string([1, ]),
+            )
+        )
+    sched_map_after_expected = ensure_dim_names_match_and_align(
+        sched_map_after_expected, sched_map_after)
 
-    if linearized_insn_ord.index("insn_b") < linearized_insn_ord.index("insn_d"):
-        # insn_b was linearized first, check schedule accordingly
-        perform_insn_bd_checks_with(0, 1)
-    else:
-        # insn_d was linearized first, check schedule accordingly
-        perform_insn_bd_checks_with(1, 0)
+    assert sched_map_before == sched_map_before_expected
+    assert sched_map_after == sched_map_after_expected
 
     # ------------------------------------------------------------------------------
     # Relationship between insn_c and insn_d ---------------------------------------
 
-    # insn_c and insn_d could have been linearized in either order
-    # (i loop could be before or after t loop)
-    def perform_insn_cd_checks_with(c_lex_idx, d_lex_idx):
-        # Get two maps
-        sched_map_before, sched_map_after = sched_maps[("insn_c", "insn_d")]
+    # Get two maps
+    sched_map_before, sched_map_after = sched_maps[("insn_c", "insn_d")]
 
-        # Create expected maps, align, compare
+    # Create expected maps, align, compare
 
-        sched_map_before_expected = isl.Map(
-            "[pi, pj] -> { [%s=0, i, j] -> [%s] : 0 <= i < pi and 0 <= j < pj }"
-            % (
-                STATEMENT_VAR_NAME,
-                _lex_space_string([c_lex_idx, ]),
-                )
-            )
-        sched_map_before_expected = ensure_dim_names_match_and_align(
-            sched_map_before_expected, sched_map_before)
-
-        sched_map_after_expected = isl.Map(
-            "[pt] -> { [%s=1, t] -> [%s] : 0 <= t < pt }"
-            % (
-                STATEMENT_VAR_NAME,
-                _lex_space_string([d_lex_idx, ]),
-                )
+    sched_map_before_expected = isl.Map(
+        "[pi, pj] -> { [%s=0, i, j] -> [%s] : 0 <= i < pi and 0 <= j < pj }"
+        % (
+            STATEMENT_VAR_NAME,
+            _lex_space_string([0, ]),
             )
-        sched_map_after_expected = ensure_dim_names_match_and_align(
-            sched_map_after_expected, sched_map_after)
+        )
+    sched_map_before_expected = ensure_dim_names_match_and_align(
+        sched_map_before_expected, sched_map_before)
 
-        assert sched_map_before == sched_map_before_expected
-        assert sched_map_after == sched_map_after_expected
+    sched_map_after_expected = isl.Map(
+        "[pt] -> { [%s=1, t] -> [%s] : 0 <= t < pt }"
+        % (
+            STATEMENT_VAR_NAME,
+            _lex_space_string([1, ]),
+            )
+        )
+    sched_map_after_expected = ensure_dim_names_match_and_align(
+        sched_map_after_expected, sched_map_after)
 
-    if linearized_insn_ord.index("insn_c") < linearized_insn_ord.index("insn_d"):
-        # insn_c was linearized first, check schedule accordingly
-        perform_insn_cd_checks_with(0, 1)
-    else:
-        # insn_d was linearized first, check schedule accordingly
-        perform_insn_cd_checks_with(1, 0)
+    assert sched_map_before == sched_map_before_expected
+    assert sched_map_after == sched_map_after_expected
 
 
 if __name__ == "__main__":

From 7d34e958233ffc742a147e5e47069b17f0a6e758 Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Wed, 2 Sep 2020 08:42:16 -0500
Subject: [PATCH 167/460] replace call to check_that_map_names_match() with
 equivalent assertion in reorder_dims_by_name()

---
 loopy/schedule/checker/utils.py | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/loopy/schedule/checker/utils.py b/loopy/schedule/checker/utils.py
index 548ef2db2..36851de44 100644
--- a/loopy/schedule/checker/utils.py
+++ b/loopy/schedule/checker/utils.py
@@ -80,10 +80,7 @@ def reorder_dims_by_name(
 
     """
 
-    check_that_map_names_match(
-        isl_set, desired_dims_ordered, dim_type,
-        assert_subset=True, assert_permutation=False)
-
+    assert set(isl_set.get_var_names(dim_type)).issubset(desired_dims_ordered)
     assert dim_type != isl.dim_type.param
 
     other_dim_type = isl.dim_type.param

From 6f6d708fb291d12a69cc5fb9362a5d2a70ceeb46 Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Wed, 2 Sep 2020 08:59:49 -0500
Subject: [PATCH 168/460] make stronger assertion in reorder_dims_by_name (just
 assert that sets match)

---
 loopy/schedule/checker/utils.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/loopy/schedule/checker/utils.py b/loopy/schedule/checker/utils.py
index 36851de44..0953454aa 100644
--- a/loopy/schedule/checker/utils.py
+++ b/loopy/schedule/checker/utils.py
@@ -80,15 +80,14 @@ def reorder_dims_by_name(
 
     """
 
-    assert set(isl_set.get_var_names(dim_type)).issubset(desired_dims_ordered)
     assert dim_type != isl.dim_type.param
+    assert set(isl_set.get_var_names(dim_type)) == set(desired_dims_ordered)
 
     other_dim_type = isl.dim_type.param
     other_dim_len = len(isl_set.get_var_names(other_dim_type))
 
     new_set = isl_set.copy()
     for desired_idx, name in enumerate(desired_dims_ordered):
-        assert name in new_set.get_var_names(dim_type)
 
         current_idx = new_set.find_dim_by_name(dim_type, name)
         if current_idx != desired_idx:

From ee4faf0f82a867f7b76586b5baefe5f234845bd0 Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Wed, 2 Sep 2020 09:12:00 -0500
Subject: [PATCH 169/460] remove check_that_map_names_match(), replace function
 call with assertion

---
 loopy/schedule/checker/utils.py | 35 ++++-----------------------------
 1 file changed, 4 insertions(+), 31 deletions(-)

diff --git a/loopy/schedule/checker/utils.py b/loopy/schedule/checker/utils.py
index 0953454aa..8e2a82a01 100644
--- a/loopy/schedule/checker/utils.py
+++ b/loopy/schedule/checker/utils.py
@@ -37,30 +37,6 @@ def add_dims_to_isl_set(isl_set, dim_type, names, new_idx_start):
     return new_set
 
 
-def check_that_map_names_match(
-        obj_map,
-        desired_names,
-        dim_type,
-        assert_subset=True,
-        assert_permutation=True,
-        ):
-    """Raise an error if names of the specified map dimension do not match
-    the desired names
-    """
-
-    obj_map_names = obj_map.space.get_var_names(dim_type)
-    if assert_permutation:
-        if not set(obj_map_names) == set(desired_names):
-            raise ValueError(
-                "Set of map names %s for dim %s does not match target set %s"
-                % (obj_map_names, dim_type, desired_names))
-    elif assert_subset:
-        if not set(obj_map_names).issubset(desired_names):
-            raise ValueError(
-                "Map names %s for dim %s are not a subset of target names %s"
-                % (obj_map_names, dim_type, desired_names))
-
-
 def reorder_dims_by_name(
         isl_set, dim_type, desired_dims_ordered):
     """Return an isl_set with the dimensions of the specified dim_type
@@ -104,14 +80,11 @@ def reorder_dims_by_name(
 def ensure_dim_names_match_and_align(obj_map, tgt_map):
 
     # first make sure names match
-    for dt in [isl.dim_type.in_, isl.dim_type.out, isl.dim_type.param]:
-        check_that_map_names_match(
-            obj_map, tgt_map.get_var_names(dt), dt,
-            assert_permutation=True)
-
-    aligned_obj_map = isl.align_spaces(obj_map, tgt_map)
+    assert all(
+        set(obj_map.get_var_names(dt)) == set(tgt_map.get_var_names(dt))
+        for dt in [isl.dim_type.in_, isl.dim_type.out, isl.dim_type.param])
 
-    return aligned_obj_map
+    return isl.align_spaces(obj_map, tgt_map)
 
 
 def sorted_union_of_names_in_isl_sets(

From b8edba90e6ec31df28dc62db1cc79aedd60237c8 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Andreas=20Kl=C3=B6ckner?= <inform@tiker.net>
Date: Fri, 4 Sep 2020 06:41:00 +0200
Subject: [PATCH 170/460] Apply 1 suggestion(s) to 1 file(s)

---
 loopy/schedule/checker/__init__.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/loopy/schedule/checker/__init__.py b/loopy/schedule/checker/__init__.py
index 3bfb3822a..f9e9933c6 100644
--- a/loopy/schedule/checker/__init__.py
+++ b/loopy/schedule/checker/__init__.py
@@ -117,6 +117,8 @@ def get_schedules_for_statement_pairs(
     # The only concurrent EnterLoop inames should be Vec and ILP
     from loopy.kernel.data import (VectorizeTag, IlpBaseTag)
     for conc_iname in conc_loop_inames:
+        # Assert that there exists an ilp or vectorize tag (out of the
+        # potentially multiple other tags on this concurrent iname).
         assert any(
             isinstance(tag, (VectorizeTag, IlpBaseTag))
             for tag in knl.iname_to_tags[conc_iname])

From 9ab0a22d1232f8dabeb0ae7bb3b2e880f808c225 Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Sun, 27 Sep 2020 21:24:01 -0500
Subject: [PATCH 171/460] rename get_lex_order_constraint->get_lex_order_set;
 lots of documenation/naming/comment improvements for clarity

---
 .../checker/lexicographic_order_map.py        | 168 ++++++++++--------
 loopy/schedule/checker/schedule.py            |  12 +-
 loopy/schedule/checker/utils.py               |  21 +--
 3 files changed, 109 insertions(+), 92 deletions(-)

diff --git a/loopy/schedule/checker/lexicographic_order_map.py b/loopy/schedule/checker/lexicographic_order_map.py
index 0966cba99..d9066030f 100644
--- a/loopy/schedule/checker/lexicographic_order_map.py
+++ b/loopy/schedule/checker/lexicographic_order_map.py
@@ -25,17 +25,19 @@
 
 
 def get_statement_ordering_map(
-        sched_map_before, sched_map_after, lex_map, before_marker="'"):
-    """Return a mapping that maps each statement instance to
-        all statement instances occuring later.
+        sched_before, sched_after, lex_map, before_marker="'"):
+    """Return a statement ordering represented as a map from each statement
+        instance to all statement instances occurring later.
 
-    :arg sched_map_before: An :class:`islpy.Map` representing instruction
-        instance order for the dependee as a mapping from each statement
-        instance to a point in the lexicographic ordering.
+    :arg sched_before: An :class:`islpy.Map` representing a schedule
+        as a mapping from statement instances (for one particular statement)
+        to lexicographic time. The statement represented will typically
+        be the dependee in a dependency relationship.
 
-    :arg sched_map_after: An :class:`islpy.Map` representing instruction
-        instance order for the depender as a mapping from each statement
-        instance to a point in the lexicographic ordering.
+    :arg sched_after: An :class:`islpy.Map` representing a schedule
+        as a mapping from statement instances (for one particular statement)
+        to lexicographic time. The statement represented will typically
+        be the depender in a dependency relationship.
 
     :arg lex_map: An :class:`islpy.Map` representing a lexicographic
         ordering as a mapping from each point in lexicographic time
@@ -45,17 +47,23 @@ def get_statement_ordering_map(
                 i0' < i0 or (i0' = i0 and i1' < i1)
                 or (i0' = i0 and i1' = i1 and i2' < i2) ...}
 
-    :returns: An :class:`islpy.Map` representing the lex schedule as
+    :arg before_marker: A :class:`str` to be appended to the names of the
+        map dimensions representing the 'before' statement in the
+        'happens before' relationship.
+
+    :returns: An :class:`islpy.Map` representing the statement odering as
         a mapping from each statement instance to all statement instances
-        occuring later. I.e., we compose relations B, L, and A as
-        B ∘ L ∘ A^-1, where B is sched_map_before, A is sched_map_after,
-        and L is the lexicographic ordering map.
+        occurring later. I.e., we compose relations B, L, and A as
+        B ∘ L ∘ A^-1, where B is `sched_before`, A is `sched_after`,
+        and L is `lex_map`.
 
     """
 
-    sio = sched_map_before.apply_range(
-        lex_map).apply_range(sched_map_after.reverse())
-    # append marker to in names
+    # Perform the composition of relations
+    sio = sched_before.apply_range(
+        lex_map).apply_range(sched_after.reverse())
+
+    # Append marker to in_ dims
     from loopy.schedule.checker.utils import (
         append_marker_to_isl_map_var_names,
     )
@@ -63,30 +71,38 @@ def get_statement_ordering_map(
         sio, isl.dim_type.in_, before_marker)
 
 
-def get_lex_order_constraint(before_names, after_names, islvars=None):
-    """Return a constraint represented as an :class:`islpy.Set`
-        defining a 'happens before' relationship in a lexicographic
-        ordering.
-
-    :arg before_names: A list of :class:`str` variable names representing
-        the lexicographic space dimensions for a point in lexicographic
-        time that occurs before. (see example below)
-
-    :arg after_names: A list of :class:`str` variable names representing
-        the lexicographic space dimensions for a point in lexicographic
-        time that occurs after. (see example below)
-
-    :arg islvars: A dictionary from variable names to :class:`islpy.PwAff`
-        instances that represent each of the variables
-        (islvars may be produced by `islpy.make_zero_and_vars`). The key
-        '0' is also include and represents a :class:`islpy.PwAff` zero constant.
-        This dictionary defines the space to be used for the set. If no
-        value is passed, the dictionary will be made using ``before_names``
-        and ``after_names``.
-
-    :returns: An :class:`islpy.Set` representing a constraint that enforces a
-        lexicographic ordering. E.g., if ``before_names = [i0', i1', i2']`` and
-        ``after_names = [i0, i1, i2]``, return the set::
+def get_lex_order_set(before_names, after_names, islvars=None):
+    """Return an :class:`islpy.Set` representing a lexicographic ordering
+        with the number of dimensions provided in `before_names`
+        (equal to the number of dimensions in `after_names`).
+
+    :arg before_names: A list of :class:`str` variable names to be used
+        to describe lexicographic space dimensions for a point in a lexicographic
+        ordering that occurs before another point, which will be represented using
+        `after_names`. (see example below)
+
+    :arg after_names: A list of :class:`str` variable names to be used
+        to describe lexicographic space dimensions for a point in a lexicographic
+        ordering that occurs after another point, which will be represented using
+        `before_names`. (see example below)
+
+    :arg islvars: A dictionary mapping variable names in `before_names` and
+        `after_names` to :class:`islpy.PwAff` instances that represent each
+        of the variables (islvars may be produced by `islpy.make_zero_and_vars`).
+        The key '0' is also include and represents a :class:`islpy.PwAff` zero
+        constant. This dictionary defines the space to be used for the set. If no
+        value is passed, the dictionary will be made using `before_names`
+        and `after_names`.
+
+    :returns: An :class:`islpy.Set` representing a big-endian lexicographic ordering
+        with the number of dimensions provided in `before_names`. The set
+        has one dimension for each name in *both* `before_names` and
+        `after_names`, and contains all points which meet a 'happens before'
+        constraint defining the lexicographic ordering. E.g., if
+        `before_names = [i0', i1', i2']` and `after_names = [i0, i1, i2]`,
+        return the set containing all points in a 3-dimensional, big-endian
+        lexicographic ordering such that point
+        `[i0', i1', i2']` happens before `[i0, i1, i2]`. I.e., return::
 
             {[i0', i1', i2', i0, i1, i2] :
                 i0' < i0 or (i0' = i0 and i1' < i1)
@@ -98,33 +114,31 @@ def get_lex_order_constraint(before_names, after_names, islvars=None):
     if islvars is None:
         islvars = isl.make_zero_and_vars(before_names+after_names, [])
 
-    # Initialize constraint with i0' < i0
-    lex_order_constraint = islvars[before_names[0]].lt_set(islvars[after_names[0]])
+    # Initialize set with constraint i0' < i0
+    lex_order_set = islvars[before_names[0]].lt_set(islvars[after_names[0]])
 
-    # Initialize conjunction constraint with True.
-    # For each dim d, starting with d=1, this conjunction will have d equalities,
-    # e.g., (i0' = i0 and i1' = i1 and ... i(d-1)' = i(d-1))
-    equality_constraint_conj = islvars[0].eq_set(islvars[0])
+    # For each dim d, starting with d=1, equality_conj_set will be constrained
+    # by d equalities, e.g., (i0' = i0 and i1' = i1 and ... i(d-1)' = i(d-1)).
+    equality_conj_set = islvars[0].eq_set(islvars[0])  # initialize to 'true'
 
     for i in range(1, len(before_names)):
 
-        # Add the next equality constraint to equality_constraint_conj
-        equality_constraint_conj = equality_constraint_conj & \
+        # Add the next equality constraint to equality_conj_set
+        equality_conj_set = equality_conj_set & \
             islvars[before_names[i-1]].eq_set(islvars[after_names[i-1]])
 
-        # Create a conjunction constraint by combining a less-than
-        # constraint for this dim, e.g., (i1' < i1), with the current
-        # equality constraint conjunction.
-        # For each dim d, starting with d=1, this conjunction will have d equalities,
-        # and one inequality,
-        # e.g., (i0' = i0 and i1' = i1 and ... i(d-1)' = i(d-1) and id' < id)
-        full_conj_constraint = islvars[before_names[i]].lt_set(
-            islvars[after_names[i]]) & equality_constraint_conj
+        # Create a set constrained by adding a less-than constraint for this dim,
+        # e.g., (i1' < i1), to the current equality conjunction set.
+        # For each dim d, starting with d=1, this full conjunction will have
+        # d equalities and one inequality, e.g.,
+        # (i0' = i0 and i1' = i1 and ... i(d-1)' = i(d-1) and id' < id)
+        full_conj_set = islvars[before_names[i]].lt_set(
+            islvars[after_names[i]]) & equality_conj_set
 
-        # Union this new constraint with the current lex_order_constraint
-        lex_order_constraint = lex_order_constraint | full_conj_constraint
+        # Union this new constraint with the current lex_order_set
+        lex_order_set = lex_order_set | full_conj_set
 
-    return lex_order_constraint
+    return lex_order_set
 
 
 def create_lex_order_map(
@@ -132,26 +146,28 @@ def create_lex_order_map(
         before_names=None,
         after_names=None,
         ):
-    """Return a mapping that maps each point in a lexicographic
-        ordering to every point that occurs later in lexicographic
-        time.
+    """Return a map from each point in a lexicographic ordering to every
+        point that occurs later in the lexicographic ordering.
 
     :arg n_dims: An :class:`int` representing the number of dimensions
-        in the lexicographic ordering.
+        in the lexicographic ordering. If not provided, `n_dims` will be
+        set to length of `after_names`.
 
-    :arg before_names: A list of :class:`str` variable names representing
-        the lexicographic space dimensions for a point in lexicographic
-        time that occurs before. (see example below)
+    :arg before_names: A list of :class:`str` variable names to be used
+        to describe lexicographic space dimensions for a point in a lexicographic
+        ordering that occurs before another point, which will be represented using
+        `after_names`. (see example below)
 
-    :arg after_names: A list of :class:`str` variable names representing
-        the lexicographic space dimensions for a point in lexicographic
-        time that occurs after. (see example below)
+    :arg after_names: A list of :class:`str` variable names to be used
+        to describe lexicographic space dimensions for a point in a lexicographic
+        ordering that occurs after another point, which will be represented using
+        `before_names`. (see example below)
 
     :returns: An :class:`islpy.Map` representing a lexicographic
         ordering as a mapping from each point in lexicographic time
         to every point that occurs later in lexicographic time.
-        E.g., if ``before_names = [i0', i1', i2']`` and
-        ``after_names = [i0, i1, i2]``, return the map::
+        E.g., if `before_names = [i0', i1', i2']` and
+        `after_names = [i0, i1, i2]`, return the map::
 
             {[i0', i1', i2'] -> [i0, i1, i2] :
                 i0' < i0 or (i0' = i0 and i1' < i1)
@@ -172,11 +188,11 @@ def create_lex_order_map(
     assert len(before_names) == len(after_names) == n_dims
     dim_type = isl.dim_type
 
-    lex_order_constraint = get_lex_order_constraint(before_names, after_names)
+    # First, get a set representing the lexicographic ordering.
+    lex_order_set = get_lex_order_set(before_names, after_names)
 
-    lex_map = isl.Map.from_domain(lex_order_constraint)
-    lex_map = lex_map.move_dims(
+    # Now convert that set to a map.
+    lex_map = isl.Map.from_domain(lex_order_set)
+    return lex_map.move_dims(
         dim_type.out, 0, dim_type.in_,
         len(before_names), len(after_names))
-
-    return lex_map
diff --git a/loopy/schedule/checker/schedule.py b/loopy/schedule/checker/schedule.py
index 97764a5e2..a947da3ac 100644
--- a/loopy/schedule/checker/schedule.py
+++ b/loopy/schedule/checker/schedule.py
@@ -317,17 +317,17 @@ def _get_map_for_stmt_inst(insn_id, lex_points, int_sid, out_names_sched):
 
 def get_lex_order_map_for_sched_space(schedule):
     """Return an :class:`islpy.BasicMap` that maps each point in a
-        lexicographic ordering to every point that is
-        lexocigraphically greater.
+        lexicographic ordering to every point that occurs later.
 
     :arg schedule: A :class:`islpy.Map` representing the ordering of
         statement instances as a mapping from statement instances to
         lexicographic time.
 
-    :returns: An :class:`islpy.BasicMap` that maps each point in a
-        lexicographic ordering to every point that is
-        lexocigraphically greater with the dimension number and names
-        matching the output dimension of `schedule`.
+    :returns: An :class:`islpy.BasicMap` representing a lexicographic
+        ordering as a mapping from each point in lexicographic time
+        to every point that occurs later in lexicographic time, with
+        the dimension count and names matching the output dimension
+        of `schedule`.
 
     """
 
diff --git a/loopy/schedule/checker/utils.py b/loopy/schedule/checker/utils.py
index 959c2116d..db1d861c8 100644
--- a/loopy/schedule/checker/utils.py
+++ b/loopy/schedule/checker/utils.py
@@ -88,16 +88,19 @@ def ensure_dim_names_match_and_align(obj_map, tgt_map):
 
 
 def append_marker_to_isl_map_var_names(old_isl_map, dim_type, marker="'"):
-    """Return an isl_map with marker appended to
-        dim_type dimension names.
+    """Return an :class:`islpy.Map` with a marker appended to the specified
+    dimension names.
 
-    :arg old_isl_map: A :class:`islpy.Map`.
+    :arg old_isl_map: An :class:`islpy.Map`.
 
-    :arg dim_type: A :class:`islpy.dim_type`, i.e., an :class:`int`,
+    :arg dim_type: An :class:`islpy.dim_type`, i.e., an :class:`int`,
         specifying the dimension to be marked.
 
-    :returns: A :class:`islpy.Map` matching `old_isl_map` with
-        apostrophes appended to dim_type dimension names.
+    :arg marker: A :class:`str` to be appended to the specified dimension
+        names. If not provided, `marker` defaults to an apostrophe.
+
+    :returns: An :class:`islpy.Map` matching `old_isl_map` with
+        `marker` appended to the `dim_type` dimension names.
 
     """
 
@@ -109,10 +112,8 @@ def append_marker_to_isl_map_var_names(old_isl_map, dim_type, marker="'"):
 
 
 def append_marker_to_strings(strings, marker="'"):
-    if not isinstance(strings, list):
-        raise ValueError("append_marker_to_strings did not receive a list")
-    else:
-        return [s+marker for s in strings]
+    assert isinstance(strings, list)
+    return [s+marker for s in strings]
 
 
 def sorted_union_of_names_in_isl_sets(

From da35b59d0188c0e838178235bf3d742df2b4c6a4 Mon Sep 17 00:00:00 2001
From: Nicholas Christensen <njchris2@illinois.edu>
Date: Mon, 1 Feb 2021 11:51:08 -0600
Subject: [PATCH 172/460] Update to use new islpy _laign_dim_type parameters

---
 loopy/transform/iname.py | 12 +++++++++++-
 1 file changed, 11 insertions(+), 1 deletion(-)

diff --git a/loopy/transform/iname.py b/loopy/transform/iname.py
index fbd0d57bc..d832adbd7 100644
--- a/loopy/transform/iname.py
+++ b/loopy/transform/iname.py
@@ -1975,6 +1975,16 @@ def process_set(s):
                 for dt in dim_types
                 for i in range(isl_map.dim(dt))
                 ]
+        aligned_map = _align_dim_type(
+                dim_type.param,
+                isl_map, map_with_s_domain, False,
+                map_names, s_names)
+        aligned_map = _align_dim_type(
+                dim_type.in_,
+                isl_map, map_with_s_domain, False,
+                map_names, s_names)
+        # Old code
+        """
         aligned_map = _align_dim_type(
                 dim_type.param,
                 isl_map, map_with_s_domain, obj_bigger_ok=False,
@@ -1983,7 +1993,7 @@ def process_set(s):
                 dim_type.in_,
                 isl_map, map_with_s_domain, obj_bigger_ok=False,
                 obj_names=map_names, tgt_names=s_names)
-
+        """
         # }}}
 
         return aligned_map.intersect_domain(s).range()

From 9cd492d409045473c97f62a78d814a3c62ad3790 Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Thu, 25 Feb 2021 16:18:35 -0600
Subject: [PATCH 173/460] moved lex order map creation into schedule generation
 func to avoid duplicating logic when we start dealing with parallel loops and
 map dims for LID/GID tags

---
 loopy/schedule/checker/schedule.py | 37 ++++++++++--------------------
 test/test_linearization_checker.py | 26 ++++++++++-----------
 2 files changed, 25 insertions(+), 38 deletions(-)

diff --git a/loopy/schedule/checker/schedule.py b/loopy/schedule/checker/schedule.py
index a947da3ac..5221eecb3 100644
--- a/loopy/schedule/checker/schedule.py
+++ b/loopy/schedule/checker/schedule.py
@@ -277,6 +277,10 @@ def _get_map_for_stmt_inst(insn_id, lex_points, int_sid, out_names_sched):
 
     # Second, create pairwise schedules for each individual pair of insns
 
+    from loopy.schedule.checker.lexicographic_order_map import (
+        create_lex_order_map,
+    )
+
     pairwise_schedules = {}
     for insn_ids in insn_id_pairs:
         lex_tuples = [stmt_instances[insn_id] for insn_id in insn_ids]
@@ -310,30 +314,13 @@ def _get_map_for_stmt_inst(insn_id, lex_points, int_sid, out_names_sched):
             in zip(insn_ids, lex_tuples_simplified, int_sids)
             ]
 
-        pairwise_schedules[tuple(insn_ids)] = tuple(sched_maps)
-
-    return pairwise_schedules
-
-
-def get_lex_order_map_for_sched_space(schedule):
-    """Return an :class:`islpy.BasicMap` that maps each point in a
-        lexicographic ordering to every point that occurs later.
+        # TODO (moved func below up here to avoid passing extra info around)
+        # Benefit (e.g.): don't want to examine the schedule tuple in separate func
+        # below to re-determine which parallel
+        # dims are used. (could simplify everything by always using all dims, which
+        # would make maps more complex than necessary)
+        lex_order_map = create_lex_order_map(after_names=out_names_sched)
 
-    :arg schedule: A :class:`islpy.Map` representing the ordering of
-        statement instances as a mapping from statement instances to
-        lexicographic time.
+        pairwise_schedules[tuple(insn_ids)] = (tuple(sched_maps), lex_order_map)
 
-    :returns: An :class:`islpy.BasicMap` representing a lexicographic
-        ordering as a mapping from each point in lexicographic time
-        to every point that occurs later in lexicographic time, with
-        the dimension count and names matching the output dimension
-        of `schedule`.
-
-    """
-
-    from loopy.schedule.checker.lexicographic_order_map import (
-        create_lex_order_map,
-    )
-
-    lex_dim_names = schedule.space.get_var_names(isl.dim_type.out)
-    return create_lex_order_map(after_names=lex_dim_names)
+    return pairwise_schedules
diff --git a/test/test_linearization_checker.py b/test/test_linearization_checker.py
index 56882416b..c7683cb27 100644
--- a/test/test_linearization_checker.py
+++ b/test/test_linearization_checker.py
@@ -116,7 +116,8 @@ def _lex_space_string(dim_vals):
     # Relationship between insn_a and insn_b ---------------------------------------
 
     # Get two maps
-    sched_map_before, sched_map_after = sched_maps[("insn_a", "insn_b")]
+    (sched_map_before, sched_map_after), sched_lex_order_map = sched_maps[
+        ("insn_a", "insn_b")]
 
     # Create expected maps, align, compare
 
@@ -147,7 +148,8 @@ def _lex_space_string(dim_vals):
     # Relationship between insn_a and insn_c ---------------------------------------
 
     # Get two maps
-    sched_map_before, sched_map_after = sched_maps[("insn_a", "insn_c")]
+    (sched_map_before, sched_map_after), sched_lex_order_map = sched_maps[
+        ("insn_a", "insn_c")]
 
     # Create expected maps, align, compare
 
@@ -178,7 +180,8 @@ def _lex_space_string(dim_vals):
     # Relationship between insn_a and insn_d ---------------------------------------
 
     # Get two maps
-    sched_map_before, sched_map_after = sched_maps[("insn_a", "insn_d")]
+    (sched_map_before, sched_map_after), sched_lex_order_map = sched_maps[
+        ("insn_a", "insn_d")]
 
     # Create expected maps, align, compare
 
@@ -209,7 +212,8 @@ def _lex_space_string(dim_vals):
     # Relationship between insn_b and insn_c ---------------------------------------
 
     # Get two maps
-    sched_map_before, sched_map_after = sched_maps[("insn_b", "insn_c")]
+    (sched_map_before, sched_map_after), sched_lex_order_map = sched_maps[
+        ("insn_b", "insn_c")]
 
     # Create expected maps, align, compare
 
@@ -240,7 +244,8 @@ def _lex_space_string(dim_vals):
     # Relationship between insn_b and insn_d ---------------------------------------
 
     # Get two maps
-    sched_map_before, sched_map_after = sched_maps[("insn_b", "insn_d")]
+    (sched_map_before, sched_map_after), sched_lex_order_map = sched_maps[
+        ("insn_b", "insn_d")]
 
     # Create expected maps, align, compare
 
@@ -271,7 +276,8 @@ def _lex_space_string(dim_vals):
     # Relationship between insn_c and insn_d ---------------------------------------
 
     # Get two maps
-    sched_map_before, sched_map_after = sched_maps[("insn_c", "insn_d")]
+    (sched_map_before, sched_map_after), sched_lex_order_map = sched_maps[
+        ("insn_c", "insn_d")]
 
     # Create expected maps, align, compare
 
@@ -369,9 +375,6 @@ def test_statement_instance_ordering_creation():
     from loopy.schedule.checker import (
         get_schedules_for_statement_pairs,
     )
-    from loopy.schedule.checker.schedule import (
-        get_lex_order_map_for_sched_space,
-    )
     from loopy.schedule.checker.utils import (
         ensure_dim_names_match_and_align,
         append_marker_to_isl_map_var_names,
@@ -441,12 +444,9 @@ def check_sio_for_insn_pair(
             ):
 
         # Get pairwise schedule
-        sched_map_before, sched_map_after = sched_maps[
+        (sched_map_before, sched_map_after), sched_lex_order_map = sched_maps[
             (insn_id_before, insn_id_after)]
 
-        # Get map representing lexicographic ordering
-        sched_lex_order_map = get_lex_order_map_for_sched_space(sched_map_before)
-
         # Get expected lex order map
         expected_lex_order_map = create_lex_order_map(
             n_dims=expected_lex_dims,

From bc748a171d19ffcbaa0c8dce4f63122ce2574344 Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Thu, 25 Feb 2021 19:52:56 -0600
Subject: [PATCH 174/460] add create_elementwise_comparison_conjunction_set()
 function from child merge request; don't extract initial iteration in
 add_dims_to_isl_set() (why did I do this before??)

---
 loopy/schedule/checker/utils.py | 40 ++++++++++++++++++++++++++++++---
 1 file changed, 37 insertions(+), 3 deletions(-)

diff --git a/loopy/schedule/checker/utils.py b/loopy/schedule/checker/utils.py
index db1d861c8..4c42be861 100644
--- a/loopy/schedule/checker/utils.py
+++ b/loopy/schedule/checker/utils.py
@@ -31,9 +31,10 @@ def prettier_map_string(map_obj):
 def add_dims_to_isl_set(isl_set, dim_type, names, new_idx_start):
     new_set = isl_set.insert_dims(
         dim_type, new_idx_start, len(names)
-        ).set_dim_name(dim_type, new_idx_start, names[0])
-    for i, name in enumerate(names[1:]):
-        new_set = new_set.set_dim_name(dim_type, new_idx_start+1+i, name)
+        )
+    #.set_dim_name(dim_type, new_idx_start, names[0])
+    for i, name in enumerate(names):
+        new_set = new_set.set_dim_name(dim_type, new_idx_start+i, name)
     return new_set
 
 
@@ -250,3 +251,36 @@ def get_EnterLoop_inames(linearization_items):
         [item.iname, ] for item in linearization_items
         if isinstance(item, EnterLoop)
         ])
+
+
+def create_elementwise_comparison_conjunction_set(
+        names0, names1, islvars, op="eq"):
+    """Create a set constrained by the conjunction of conditions comparing
+       `names0` to `names1`.
+
+    :arg names0: A list of :class:`str` representing variable names.
+
+    :arg names1: A list of :class:`str` representing variable names.
+
+    :arg islvars: A dictionary from variable names to :class:`islpy.PwAff`
+        instances that represent each of the variables
+        (islvars may be produced by `islpy.make_zero_and_vars`). The key
+        '0' is also include and represents a :class:`islpy.PwAff` zero constant.
+
+    :arg op: A :class:`str` describing the operator to use when creating
+        the set constraints. Options: `eq` for `=`, `lt` for `<`
+
+    :returns: A set involving `islvars` cosntrained by the constraints
+        `{names0[0] <op> names1[0] and names0[1] <op> names1[1] and ...}`.
+
+    """
+
+    # initialize set with constraint that is always true
+    conj_set = islvars[0].eq_set(islvars[0])
+    for n0, n1 in zip(names0, names1):
+        if op == "eq":
+            conj_set = conj_set & islvars[n0].eq_set(islvars[n1])
+        elif op == "lt":
+            conj_set = conj_set & islvars[n0].lt_set(islvars[n1])
+
+    return conj_set

From 24c8b68d530b1f94c1af5ffe8d682bdaf8daeab5 Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Thu, 25 Feb 2021 19:57:12 -0600
Subject: [PATCH 175/460] add dims to lex space for parallel (gid/lid) loops;
 in lex order map, require that corresponding parallel dims be equal; changed
 function signatures for get_lex_order_set() and create_lex_order_map()

---
 .../checker/lexicographic_order_map.py        |  39 +++++-
 loopy/schedule/checker/schedule.py            |  60 +++++++--
 test/test_linearization_checker.py            | 122 ++++++++++++++++--
 3 files changed, 191 insertions(+), 30 deletions(-)

diff --git a/loopy/schedule/checker/lexicographic_order_map.py b/loopy/schedule/checker/lexicographic_order_map.py
index d9066030f..144c20a8b 100644
--- a/loopy/schedule/checker/lexicographic_order_map.py
+++ b/loopy/schedule/checker/lexicographic_order_map.py
@@ -71,7 +71,12 @@ def get_statement_ordering_map(
         sio, isl.dim_type.in_, before_marker)
 
 
-def get_lex_order_set(before_names, after_names, islvars=None):
+def get_lex_order_set(
+        before_names, after_names,
+        before_names_concurrent=[],
+        after_names_concurrent=[],
+        islvars=None,
+        ):
     """Return an :class:`islpy.Set` representing a lexicographic ordering
         with the number of dimensions provided in `before_names`
         (equal to the number of dimensions in `after_names`).
@@ -109,10 +114,17 @@ def get_lex_order_set(before_names, after_names, islvars=None):
                 or (i0' = i0 and i1' = i1 and i2' < i2)}
 
     """
+    # TODO update doc
+
+    from loopy.schedule.checker.utils import (
+        create_elementwise_comparison_conjunction_set,
+    )
 
     # If no islvars passed, make them using the names provided
     if islvars is None:
-        islvars = isl.make_zero_and_vars(before_names+after_names, [])
+        islvars = isl.make_zero_and_vars(
+            before_names+after_names+before_names_concurrent+after_names_concurrent,
+            [])
 
     # Initialize set with constraint i0' < i0
     lex_order_set = islvars[before_names[0]].lt_set(islvars[after_names[0]])
@@ -138,6 +150,12 @@ def get_lex_order_set(before_names, after_names, islvars=None):
         # Union this new constraint with the current lex_order_set
         lex_order_set = lex_order_set | full_conj_set
 
+    lex_order_set = lex_order_set & \
+        create_elementwise_comparison_conjunction_set(
+            before_names_concurrent, after_names_concurrent,
+            islvars, op="eq",
+            )
+
     return lex_order_set
 
 
@@ -145,6 +163,7 @@ def create_lex_order_map(
         n_dims=None,
         before_names=None,
         after_names=None,
+        after_names_concurrent=[],
         ):
     """Return a map from each point in a lexicographic ordering to every
         point that occurs later in the lexicographic ordering.
@@ -174,25 +193,31 @@ def create_lex_order_map(
                 or (i0' = i0 and i1' = i1 and i2' < i2)}
 
     """
+    # TODO update doc
+
+    from loopy.schedule.checker.utils import append_marker_to_strings
 
     if after_names is None:
         after_names = ["i%s" % (i) for i in range(n_dims)]
     if before_names is None:
-        from loopy.schedule.checker.utils import (
-            append_marker_to_strings,
-        )
         before_names = append_marker_to_strings(after_names, marker="'")
     if n_dims is None:
         n_dims = len(after_names)
+    before_names_concurrent = append_marker_to_strings(
+        after_names_concurrent, marker="'")
 
     assert len(before_names) == len(after_names) == n_dims
     dim_type = isl.dim_type
 
     # First, get a set representing the lexicographic ordering.
-    lex_order_set = get_lex_order_set(before_names, after_names)
+    lex_order_set = get_lex_order_set(
+        before_names, after_names,
+        before_names_concurrent, after_names_concurrent,
+        )
 
     # Now convert that set to a map.
     lex_map = isl.Map.from_domain(lex_order_set)
     return lex_map.move_dims(
         dim_type.out, 0, dim_type.in_,
-        len(before_names), len(after_names))
+        len(before_names) + len(before_names_concurrent),
+        len(after_names) + len(after_names_concurrent))
diff --git a/loopy/schedule/checker/schedule.py b/loopy/schedule/checker/schedule.py
index 5221eecb3..5d3e0fa96 100644
--- a/loopy/schedule/checker/schedule.py
+++ b/loopy/schedule/checker/schedule.py
@@ -43,8 +43,14 @@
 """
 
 LIN_CHECK_IDENTIFIER_PREFIX = "_lp_linchk_"
-LEX_VAR_PREFIX = "%sl" % (LIN_CHECK_IDENTIFIER_PREFIX)
+LEX_VAR_PREFIX = "%slex" % (LIN_CHECK_IDENTIFIER_PREFIX)
 STATEMENT_VAR_NAME = "%sstmt" % (LIN_CHECK_IDENTIFIER_PREFIX)
+# TODO document:
+GTAG_VAR_NAMES = []
+LTAG_VAR_NAMES = []
+for par_level in [0, 1, 2]:
+    GTAG_VAR_NAMES.append("%sgid%d" % (LIN_CHECK_IDENTIFIER_PREFIX, par_level))
+    LTAG_VAR_NAMES.append("%slid%d" % (LIN_CHECK_IDENTIFIER_PREFIX, par_level))
 
 
 def _pad_tuple_with_zeros(tup, desired_length):
@@ -142,8 +148,10 @@ def generate_pairwise_schedules(
         mappings from statement instances to lexicographic time, one for
         each of the two statements.
     """
+    # TODO update doc
 
     from loopy.schedule import (EnterLoop, LeaveLoop, Barrier, RunInstruction)
+    from loopy.kernel.data import (LocalIndexTag, GroupIndexTag)
 
     all_insn_ids = set().union(*insn_id_pairs)
 
@@ -233,13 +241,16 @@ def generate_pairwise_schedules(
         if len(stmt_instances.keys()) == len(all_insn_ids):
             break
 
+    # Second, create pairwise schedules for each individual pair of insns
+
     from loopy.schedule.checker.utils import (
         sorted_union_of_names_in_isl_sets,
         create_symbolic_map_from_tuples,
         add_dims_to_isl_set,
     )
 
-    def _get_map_for_stmt_inst(insn_id, lex_points, int_sid, out_names_sched):
+    def _get_map_for_stmt(
+            insn_id, lex_points, int_sid, seq_lex_dim_names, conc_lex_dim_names):
 
         # Get inames domain for statement instance (a BasicSet)
         dom = knl.get_inames_domain(
@@ -253,13 +264,15 @@ def _get_map_for_stmt_inst(insn_id, lex_points, int_sid, out_names_sched):
         in_names_sched = [STATEMENT_VAR_NAME] + dom_inames_ordered[:]
         sched_space = isl.Space.create_from_names(
             isl.DEFAULT_CONTEXT,
-            in_=in_names_sched, out=out_names_sched, params=[])
+            in_=in_names_sched,
+            out=seq_lex_dim_names+conc_lex_dim_names,
+            params=[],
+            )
 
         # Insert 'statement' dim into domain so that its space allows
         # for intersection with sched map later
-        dom_to_intersect = [
-            add_dims_to_isl_set(
-                dom, isl.dim_type.set, [STATEMENT_VAR_NAME], 0), ]
+        dom_to_intersect = add_dims_to_isl_set(
+                dom, isl.dim_type.set, [STATEMENT_VAR_NAME], 0)
 
         # Each map will map statement instances -> lex time.
         # Right now, statement instance tuples consist of single int.
@@ -271,11 +284,30 @@ def _get_map_for_stmt_inst(insn_id, lex_points, int_sid, out_names_sched):
 
         # Create map
         return create_symbolic_map_from_tuples(
-            tuple_pairs_with_domains=zip(tuple_pair, dom_to_intersect),
+            tuple_pairs_with_domains=zip(tuple_pair, [dom_to_intersect,]),
             space=sched_space,
             )
 
-    # Second, create pairwise schedules for each individual pair of insns
+    # Get local/group axes for this kernel
+    l_axes_used = set()
+    g_axes_used = set()
+    for iname in knl.all_inames():
+        ltag = knl.iname_tags_of_type(iname, LocalIndexTag)
+        if ltag:
+            assert len(ltag) == 1  # TODO always true? remove?
+            l_axes_used.add(ltag.pop().axis)
+            continue
+        gtag = knl.iname_tags_of_type(iname, GroupIndexTag)
+        if gtag:
+            assert len(gtag) == 1  # TODO always true? remove?
+            g_axes_used.add(gtag.pop().axis)
+            continue
+    conc_lex_dim_names = (
+        [LTAG_VAR_NAMES[i] for i in sorted(l_axes_used)] +
+        [GTAG_VAR_NAMES[i] for i in sorted(g_axes_used)]
+        )
+    # TODO (For now, using same loc/glob axes for for all pairwise
+    # schedules in this knl.)
 
     from loopy.schedule.checker.lexicographic_order_map import (
         create_lex_order_map,
@@ -299,8 +331,8 @@ def _get_map_for_stmt_inst(insn_id, lex_points, int_sid, out_names_sched):
 
         # Now generate maps from the blueprint --------------------------------------
 
-        # Create names for the output dimensions
-        out_names_sched = [
+        # Create names for the output dimensions for sequential loops
+        seq_lex_dim_names = [
             LEX_VAR_PREFIX+str(i) for i in range(len(lex_tuples_simplified[0]))]
 
         # Determine integer IDs that will represent each statement in mapping
@@ -309,7 +341,8 @@ def _get_map_for_stmt_inst(insn_id, lex_points, int_sid, out_names_sched):
         int_sids = [0, 0] if insn_ids[0] == insn_ids[1] else [0, 1]
 
         sched_maps = [
-            _get_map_for_stmt_inst(insn_id, lex_tuple, int_sid, out_names_sched)
+            _get_map_for_stmt(
+                insn_id, lex_tuple, int_sid, seq_lex_dim_names, conc_lex_dim_names)
             for insn_id, lex_tuple, int_sid
             in zip(insn_ids, lex_tuples_simplified, int_sids)
             ]
@@ -319,7 +352,10 @@ def _get_map_for_stmt_inst(insn_id, lex_points, int_sid, out_names_sched):
         # below to re-determine which parallel
         # dims are used. (could simplify everything by always using all dims, which
         # would make maps more complex than necessary)
-        lex_order_map = create_lex_order_map(after_names=out_names_sched)
+        lex_order_map = create_lex_order_map(
+            after_names=seq_lex_dim_names,
+            after_names_concurrent=conc_lex_dim_names,
+            )
 
         pairwise_schedules[tuple(insn_ids)] = (tuple(sched_maps), lex_order_map)
 
diff --git a/test/test_linearization_checker.py b/test/test_linearization_checker.py
index c7683cb27..99bd39394 100644
--- a/test/test_linearization_checker.py
+++ b/test/test_linearization_checker.py
@@ -38,6 +38,8 @@
 from loopy.schedule.checker.schedule import (
     LEX_VAR_PREFIX,
     STATEMENT_VAR_NAME,
+    LTAG_VAR_NAMES,
+    GTAG_VAR_NAMES,
 )
 
 logger = logging.getLogger(__name__)
@@ -45,6 +47,18 @@
 
 # {{{ test pairwise schedule creation
 
+def _lex_space_string(dim_vals, lid_axes=[], gid_axes=[]):
+    # Return a string describing lex space dimension assignments
+    # (used to create maps below)
+
+    lid_names = [LTAG_VAR_NAMES[i] for i in lid_axes]
+    gid_names = [GTAG_VAR_NAMES[i] for i in gid_axes]
+
+    return ", ".join(
+        ["%s%d=%s" % (LEX_VAR_PREFIX, idx, str(val))
+        for idx, val in enumerate(dim_vals)] + lid_names + gid_names)
+
+
 def test_pairwise_schedule_creation():
     import islpy as isl
     from loopy.schedule.checker import (
@@ -88,16 +102,9 @@ def test_pairwise_schedule_creation():
     knl = lp.prioritize_loops(knl, "i,j")
 
     # get a linearization
-    knl = preprocess_kernel(knl)
-    knl = get_one_linearized_kernel(knl)
-    linearization_items = knl.linearization
-
-    def _lex_space_string(dim_vals):
-        # Return a string describing lex space dimension assignments
-        # (used to create maps below)
-        return ", ".join(
-            ["%s%d=%s" % (LEX_VAR_PREFIX, idx, str(val))
-            for idx, val in enumerate(dim_vals)])
+    proc_knl = preprocess_kernel(knl)
+    lin_knl = get_one_linearized_kernel(proc_knl)
+    linearization_items = lin_knl.linearization
 
     insn_id_pairs = [
         ("insn_a", "insn_b"),
@@ -108,7 +115,7 @@ def _lex_space_string(dim_vals):
         ("insn_c", "insn_d"),
         ]
     sched_maps = get_schedules_for_statement_pairs(
-        knl,
+        proc_knl,
         linearization_items,
         insn_id_pairs,
         )
@@ -304,6 +311,99 @@ def _lex_space_string(dim_vals):
     assert sched_map_before == sched_map_before_expected
     assert sched_map_after == sched_map_after_expected
 
+
+def test_pairwise_schedule_creation_parallel():
+    import islpy as isl
+    from loopy.schedule.checker import (
+        get_schedules_for_statement_pairs,
+    )
+    from loopy.schedule.checker.utils import (
+        ensure_dim_names_match_and_align,
+    )
+
+    # example kernel
+    knl = lp.make_kernel(
+        [
+            "{[i]: 0<=i<pi}",
+            "{[k]: 0<=k<pk}",
+            "{[j,jj]: 0<=j,jj<pj}",
+            "{[t]: 0<=t<pt}",
+        ],
+        """
+        for i
+            for k
+                <>temp = b[i,k]  {id=insn_a}
+            end
+            for j
+                for jj
+                    a[i,j,jj] = temp + 1  {id=insn_b,dep=insn_a}
+                    c[i,j,jj] = d[i,j,jj]  {id=insn_c,dep=insn_b}
+                end
+            end
+        end
+        for t
+            e[t] = f[t]  {id=insn_d, dep=insn_c}
+        end
+        """,
+        name="example",
+        assumptions="pi,pj,pk,pt >= 1",
+        )
+    knl = lp.add_and_infer_dtypes(
+            knl,
+            {"b": np.float32, "d": np.float32, "f": np.float32})
+    knl = lp.prioritize_loops(knl, "i,k")
+    knl = lp.tag_inames(knl, {"j": "l.1", "jj": "l.0", "t": "g.0"})
+
+    # get a linearization
+    proc_knl = preprocess_kernel(knl)
+    lin_knl = get_one_linearized_kernel(proc_knl)
+    linearization_items = lin_knl.linearization
+
+    insn_id_pairs = [
+        ("insn_a", "insn_b"),
+        ("insn_a", "insn_c"),
+        ("insn_a", "insn_d"),
+        ("insn_b", "insn_c"),
+        ("insn_b", "insn_d"),
+        ("insn_c", "insn_d"),
+        ]
+    sched_maps = get_schedules_for_statement_pairs(
+        proc_knl,
+        linearization_items,
+        insn_id_pairs,
+        )
+
+    # Relationship between insn_a and insn_b ---------------------------------------
+
+    # Get two maps
+    (sched_map_before, sched_map_after), sched_lex_order_map = sched_maps[
+        ("insn_a", "insn_b")]
+
+    # Create expected maps, align, compare
+
+    sched_map_before_expected = isl.Map(
+        "[pi, pk] -> { [%s=0, i, k] -> [%s] : 0 <= i < pi and 0 <= k < pk }"
+        % (
+            STATEMENT_VAR_NAME,
+            _lex_space_string(["i", "0"], lid_axes=[0, 1], gid_axes=[0]),
+            )
+        )
+    sched_map_before_expected = ensure_dim_names_match_and_align(
+        sched_map_before_expected, sched_map_before)
+
+    sched_map_after_expected = isl.Map(
+        "[pi, pj] -> { [%s=1, i, j, jj] -> [%s] : 0 <= i < pi and 0 <= j,jj < pj }"
+        % (
+            STATEMENT_VAR_NAME,
+            _lex_space_string(["i", "1"], lid_axes=[0, 1], gid_axes=[0]),
+            )
+        )
+    sched_map_after_expected = ensure_dim_names_match_and_align(
+        sched_map_after_expected, sched_map_after)
+
+    assert sched_map_before == sched_map_before_expected
+    assert sched_map_after == sched_map_after_expected
+
 # }}}
 
 

From ac6aec9bd634e69fac22bebf6286c547def25be2 Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Thu, 25 Feb 2021 20:02:19 -0600
Subject: [PATCH 176/460] fix flake8 issue

---
 loopy/schedule/checker/schedule.py | 2 +-
 test/test_linearization_checker.py | 2 ++
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/loopy/schedule/checker/schedule.py b/loopy/schedule/checker/schedule.py
index 5d3e0fa96..734d568c3 100644
--- a/loopy/schedule/checker/schedule.py
+++ b/loopy/schedule/checker/schedule.py
@@ -284,7 +284,7 @@ def _get_map_for_stmt(
 
         # Create map
         return create_symbolic_map_from_tuples(
-            tuple_pairs_with_domains=zip(tuple_pair, [dom_to_intersect,]),
+            tuple_pairs_with_domains=zip(tuple_pair, [dom_to_intersect, ]),
             space=sched_space,
             )
 
diff --git a/test/test_linearization_checker.py b/test/test_linearization_checker.py
index 99bd39394..5787e8bcb 100644
--- a/test/test_linearization_checker.py
+++ b/test/test_linearization_checker.py
@@ -662,6 +662,8 @@ def check_sio_for_insn_pair(
 
     check_sio_for_insn_pair("insn_c", "insn_d", 1, expected_sio)
 
+# TODO test SIO creation with parallel loops
+
 # }}}
 
 

From 41232897684b42a3877a8eac1767b39fb0b4dccf Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Sat, 27 Feb 2021 19:45:49 -0600
Subject: [PATCH 177/460] correct order of var names passed to
 isl.make_zero_and_vars()

---
 loopy/schedule/checker/lexicographic_order_map.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/loopy/schedule/checker/lexicographic_order_map.py b/loopy/schedule/checker/lexicographic_order_map.py
index 144c20a8b..fb912cb7b 100644
--- a/loopy/schedule/checker/lexicographic_order_map.py
+++ b/loopy/schedule/checker/lexicographic_order_map.py
@@ -121,9 +121,10 @@ def get_lex_order_set(
     )
 
     # If no islvars passed, make them using the names provided
+    # (make sure to pass var names in desired order of space dims)
     if islvars is None:
         islvars = isl.make_zero_and_vars(
-            before_names+after_names+before_names_concurrent+after_names_concurrent,
+            before_names+before_names_concurrent+after_names+after_names_concurrent,
             [])
 
     # Initialize set with constraint i0' < i0

From 35968a1f5ef3e5b1a48a1755b2c455241b9966f5 Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Sat, 27 Feb 2021 19:49:08 -0600
Subject: [PATCH 178/460] reduce duplicated code by adding/improving helper
 functions; test lex map creation with parallel hw tags; more tests for
 schedule creation with parallel hw tags; improve variable naming a bit

---
 test/test_linearization_checker.py | 394 ++++++++++++++++++-----------
 1 file changed, 243 insertions(+), 151 deletions(-)

diff --git a/test/test_linearization_checker.py b/test/test_linearization_checker.py
index 5787e8bcb..3d8e7203f 100644
--- a/test/test_linearization_checker.py
+++ b/test/test_linearization_checker.py
@@ -45,10 +45,22 @@
 logger = logging.getLogger(__name__)
 
 
-# {{{ test pairwise schedule creation
+# {{{ helper functions for map creation/handling
+
+def _align_and_compare_maps(maps1, maps2):
+    from loopy.schedule.checker.utils import (
+        ensure_dim_names_match_and_align,
+    )
 
-def _lex_space_string(dim_vals, lid_axes=[], gid_axes=[]):
-    # Return a string describing lex space dimension assignments
+    for map1, map2 in zip(maps1, maps2):
+        # Align maps and compare
+        map1_aligned = ensure_dim_names_match_and_align(map1, map2)
+        assert map1_aligned == map2
+
+
+def _lex_point_string(dim_vals, lid_axes=[], gid_axes=[]):
+    # Return a string describing a point in a lex space
+    # by assigning values to lex dimension variables
     # (used to create maps below)
 
     lid_names = [LTAG_VAR_NAMES[i] for i in lid_axes]
@@ -58,17 +70,18 @@ def _lex_space_string(dim_vals, lid_axes=[], gid_axes=[]):
         ["%s%d=%s" % (LEX_VAR_PREFIX, idx, str(val))
         for idx, val in enumerate(dim_vals)] + lid_names + gid_names)
 
+# }}}
+
+
+# {{{ test pairwise schedule creation
 
 def test_pairwise_schedule_creation():
     import islpy as isl
     from loopy.schedule.checker import (
         get_schedules_for_statement_pairs,
     )
-    from loopy.schedule.checker.utils import (
-        ensure_dim_names_match_and_align,
-    )
 
-    # example kernel
+    # Example kernel
     # insn_c depends on insn_b only to create deterministic order
     # insn_d depends on insn_c only to create deterministic order
     knl = lp.make_kernel(
@@ -101,7 +114,7 @@ def test_pairwise_schedule_creation():
     knl = lp.prioritize_loops(knl, "i,k")
     knl = lp.prioritize_loops(knl, "i,j")
 
-    # get a linearization
+    # Get a linearization
     proc_knl = preprocess_kernel(knl)
     lin_knl = get_one_linearized_kernel(proc_knl)
     linearization_items = lin_knl.linearization
@@ -126,30 +139,28 @@ def test_pairwise_schedule_creation():
     (sched_map_before, sched_map_after), sched_lex_order_map = sched_maps[
         ("insn_a", "insn_b")]
 
-    # Create expected maps, align, compare
+    # Create expected maps and compare
 
     sched_map_before_expected = isl.Map(
         "[pi, pk] -> { [%s=0, i, k] -> [%s] : 0 <= i < pi and 0 <= k < pk }"
         % (
             STATEMENT_VAR_NAME,
-            _lex_space_string(["i", "0"]),
+            _lex_point_string(["i", "0"]),
             )
         )
-    sched_map_before_expected = ensure_dim_names_match_and_align(
-        sched_map_before_expected, sched_map_before)
 
     sched_map_after_expected = isl.Map(
         "[pi, pj] -> { [%s=1, i, j] -> [%s] : 0 <= i < pi and 0 <= j < pj }"
         % (
             STATEMENT_VAR_NAME,
-            _lex_space_string(["i", "1"]),
+            _lex_point_string(["i", "1"]),
             )
         )
-    sched_map_after_expected = ensure_dim_names_match_and_align(
-        sched_map_after_expected, sched_map_after)
 
-    assert sched_map_before == sched_map_before_expected
-    assert sched_map_after == sched_map_after_expected
+    _align_and_compare_maps(
+        [sched_map_before_expected, sched_map_after_expected],
+        [sched_map_before, sched_map_after],
+        )
 
     # ------------------------------------------------------------------------------
     # Relationship between insn_a and insn_c ---------------------------------------
@@ -158,30 +169,28 @@ def test_pairwise_schedule_creation():
     (sched_map_before, sched_map_after), sched_lex_order_map = sched_maps[
         ("insn_a", "insn_c")]
 
-    # Create expected maps, align, compare
+    # Create expected maps and compare
 
     sched_map_before_expected = isl.Map(
         "[pi, pk] -> { [%s=0, i, k] -> [%s] : 0 <= i < pi and 0 <= k < pk }"
         % (
             STATEMENT_VAR_NAME,
-            _lex_space_string(["i", "0"]),
+            _lex_point_string(["i", "0"]),
             )
         )
-    sched_map_before_expected = ensure_dim_names_match_and_align(
-        sched_map_before_expected, sched_map_before)
 
     sched_map_after_expected = isl.Map(
         "[pi, pj] -> { [%s=1, i, j] -> [%s] : 0 <= i < pi and 0 <= j < pj }"
         % (
             STATEMENT_VAR_NAME,
-            _lex_space_string(["i", "1"]),
+            _lex_point_string(["i", "1"]),
             )
         )
-    sched_map_after_expected = ensure_dim_names_match_and_align(
-        sched_map_after_expected, sched_map_after)
 
-    assert sched_map_before == sched_map_before_expected
-    assert sched_map_after == sched_map_after_expected
+    _align_and_compare_maps(
+        [sched_map_before_expected, sched_map_after_expected],
+        [sched_map_before, sched_map_after],
+        )
 
     # ------------------------------------------------------------------------------
     # Relationship between insn_a and insn_d ---------------------------------------
@@ -190,30 +199,28 @@ def test_pairwise_schedule_creation():
     (sched_map_before, sched_map_after), sched_lex_order_map = sched_maps[
         ("insn_a", "insn_d")]
 
-    # Create expected maps, align, compare
+    # Create expected maps and compare
 
     sched_map_before_expected = isl.Map(
         "[pi, pk] -> { [%s=0, i, k] -> [%s] : 0 <= i < pi and 0 <= k < pk }"
         % (
             STATEMENT_VAR_NAME,
-            _lex_space_string([0, ]),
+            _lex_point_string([0, ]),
             )
         )
-    sched_map_before_expected = ensure_dim_names_match_and_align(
-        sched_map_before_expected, sched_map_before)
 
     sched_map_after_expected = isl.Map(
         "[pt] -> { [%s=1, t] -> [%s] : 0 <= t < pt }"
         % (
             STATEMENT_VAR_NAME,
-            _lex_space_string([1, ]),
+            _lex_point_string([1, ]),
             )
         )
-    sched_map_after_expected = ensure_dim_names_match_and_align(
-        sched_map_after_expected, sched_map_after)
 
-    assert sched_map_before == sched_map_before_expected
-    assert sched_map_after == sched_map_after_expected
+    _align_and_compare_maps(
+        [sched_map_before_expected, sched_map_after_expected],
+        [sched_map_before, sched_map_after],
+        )
 
     # ------------------------------------------------------------------------------
     # Relationship between insn_b and insn_c ---------------------------------------
@@ -222,30 +229,28 @@ def test_pairwise_schedule_creation():
     (sched_map_before, sched_map_after), sched_lex_order_map = sched_maps[
         ("insn_b", "insn_c")]
 
-    # Create expected maps, align, compare
+    # Create expected maps and compare
 
     sched_map_before_expected = isl.Map(
         "[pi, pj] -> { [%s=0, i, j] -> [%s] : 0 <= i < pi and 0 <= j < pj }"
         % (
             STATEMENT_VAR_NAME,
-            _lex_space_string(["i", "j", 0]),
+            _lex_point_string(["i", "j", 0]),
             )
         )
-    sched_map_before_expected = ensure_dim_names_match_and_align(
-        sched_map_before_expected, sched_map_before)
 
     sched_map_after_expected = isl.Map(
         "[pi, pj] -> { [%s=1, i, j] -> [%s] : 0 <= i < pi and 0 <= j < pj }"
         % (
             STATEMENT_VAR_NAME,
-            _lex_space_string(["i", "j", 1]),
+            _lex_point_string(["i", "j", 1]),
             )
         )
-    sched_map_after_expected = ensure_dim_names_match_and_align(
-        sched_map_after_expected, sched_map_after)
 
-    assert sched_map_before == sched_map_before_expected
-    assert sched_map_after == sched_map_after_expected
+    _align_and_compare_maps(
+        [sched_map_before_expected, sched_map_after_expected],
+        [sched_map_before, sched_map_after],
+        )
 
     # ------------------------------------------------------------------------------
     # Relationship between insn_b and insn_d ---------------------------------------
@@ -254,30 +259,28 @@ def test_pairwise_schedule_creation():
     (sched_map_before, sched_map_after), sched_lex_order_map = sched_maps[
         ("insn_b", "insn_d")]
 
-    # Create expected maps, align, compare
+    # Create expected maps and compare
 
     sched_map_before_expected = isl.Map(
         "[pi, pj] -> { [%s=0, i, j] -> [%s] : 0 <= i < pi and 0 <= j < pj }"
         % (
             STATEMENT_VAR_NAME,
-            _lex_space_string([0, ]),
+            _lex_point_string([0, ]),
             )
         )
-    sched_map_before_expected = ensure_dim_names_match_and_align(
-        sched_map_before_expected, sched_map_before)
 
     sched_map_after_expected = isl.Map(
         "[pt] -> { [%s=1, t] -> [%s] : 0 <= t < pt }"
         % (
             STATEMENT_VAR_NAME,
-            _lex_space_string([1, ]),
+            _lex_point_string([1, ]),
             )
         )
-    sched_map_after_expected = ensure_dim_names_match_and_align(
-        sched_map_after_expected, sched_map_after)
 
-    assert sched_map_before == sched_map_before_expected
-    assert sched_map_after == sched_map_after_expected
+    _align_and_compare_maps(
+        [sched_map_before_expected, sched_map_after_expected],
+        [sched_map_before, sched_map_after],
+        )
 
     # ------------------------------------------------------------------------------
     # Relationship between insn_c and insn_d ---------------------------------------
@@ -286,42 +289,37 @@ def test_pairwise_schedule_creation():
     (sched_map_before, sched_map_after), sched_lex_order_map = sched_maps[
         ("insn_c", "insn_d")]
 
-    # Create expected maps, align, compare
+    # Create expected maps and compare
 
     sched_map_before_expected = isl.Map(
         "[pi, pj] -> { [%s=0, i, j] -> [%s] : 0 <= i < pi and 0 <= j < pj }"
         % (
             STATEMENT_VAR_NAME,
-            _lex_space_string([0, ]),
+            _lex_point_string([0, ]),
             )
         )
-    sched_map_before_expected = ensure_dim_names_match_and_align(
-        sched_map_before_expected, sched_map_before)
 
     sched_map_after_expected = isl.Map(
         "[pt] -> { [%s=1, t] -> [%s] : 0 <= t < pt }"
         % (
             STATEMENT_VAR_NAME,
-            _lex_space_string([1, ]),
+            _lex_point_string([1, ]),
             )
         )
-    sched_map_after_expected = ensure_dim_names_match_and_align(
-        sched_map_after_expected, sched_map_after)
 
-    assert sched_map_before == sched_map_before_expected
-    assert sched_map_after == sched_map_after_expected
+    _align_and_compare_maps(
+        [sched_map_before_expected, sched_map_after_expected],
+        [sched_map_before, sched_map_after],
+        )
 
 
-def test_pairwise_schedule_creation_parallel():
+def test_pairwise_schedule_creation_with_hw_par_tags():
     import islpy as isl
     from loopy.schedule.checker import (
         get_schedules_for_statement_pairs,
     )
-    from loopy.schedule.checker.utils import (
-        ensure_dim_names_match_and_align,
-    )
 
-    # example kernel
+    # Example kernel
     knl = lp.make_kernel(
         [
             "{[i]: 0<=i<pi}",
@@ -337,35 +335,29 @@ def test_pairwise_schedule_creation_parallel():
             for j
                 for jj
                     a[i,j,jj] = temp + 1  {id=insn_b,dep=insn_a}
-                    c[i,j,jj] = d[i,j,jj]  {id=insn_c,dep=insn_b}
                 end
             end
         end
         for t
-            e[t] = f[t]  {id=insn_d, dep=insn_c}
+            e[t] = f[t]  {id=insn_d, dep=insn_b}
         end
         """,
         name="example",
         assumptions="pi,pj,pk,pt >= 1",
         )
-    knl = lp.add_and_infer_dtypes(
-            knl,
-            {"b": np.float32, "d": np.float32, "f": np.float32})
+    knl = lp.add_and_infer_dtypes(knl, {"b": np.float32, "f": np.float32})
     knl = lp.prioritize_loops(knl, "i,k")
     knl = lp.tag_inames(knl, {"j": "l.1", "jj": "l.0", "t": "g.0"})
 
-    # get a linearization
+    # Get a linearization
     proc_knl = preprocess_kernel(knl)
     lin_knl = get_one_linearized_kernel(proc_knl)
     linearization_items = lin_knl.linearization
 
     insn_id_pairs = [
         ("insn_a", "insn_b"),
-        ("insn_a", "insn_c"),
         ("insn_a", "insn_d"),
-        ("insn_b", "insn_c"),
         ("insn_b", "insn_d"),
-        ("insn_c", "insn_d"),
         ]
     sched_maps = get_schedules_for_statement_pairs(
         proc_knl,
@@ -379,30 +371,90 @@ def test_pairwise_schedule_creation_parallel():
     (sched_map_before, sched_map_after), sched_lex_order_map = sched_maps[
         ("insn_a", "insn_b")]
 
-    # Create expected maps, align, compare
+    # Create expected maps and compare
 
     sched_map_before_expected = isl.Map(
         "[pi, pk] -> { [%s=0, i, k] -> [%s] : 0 <= i < pi and 0 <= k < pk }"
         % (
             STATEMENT_VAR_NAME,
-            _lex_space_string(["i", "0"], lid_axes=[0, 1], gid_axes=[0]),
+            _lex_point_string(["i", "0"], lid_axes=[0, 1], gid_axes=[0]),
             )
         )
-    sched_map_before_expected = ensure_dim_names_match_and_align(
-        sched_map_before_expected, sched_map_before)
 
     sched_map_after_expected = isl.Map(
         "[pi, pj] -> { [%s=1, i, j, jj] -> [%s] : 0 <= i < pi and 0 <= j,jj < pj }"
         % (
             STATEMENT_VAR_NAME,
-            _lex_space_string(["i", "1"], lid_axes=[0, 1], gid_axes=[0]),
+            _lex_point_string(["i", "1"], lid_axes=[0, 1], gid_axes=[0]),
+            )
+        )
+
+    _align_and_compare_maps(
+        [sched_map_before_expected, sched_map_after_expected],
+        [sched_map_before, sched_map_after],
+        )
+
+    # ------------------------------------------------------------------------------
+    # Relationship between insn_a and insn_d ---------------------------------------
+
+    # Get two maps
+    (sched_map_before, sched_map_after), sched_lex_order_map = sched_maps[
+        ("insn_a", "insn_d")]
+
+    # Create expected maps and compare
+
+    sched_map_before_expected = isl.Map(
+        "[pi, pk] -> { [%s=0, i, k] -> [%s] : 0 <= i < pi and 0 <= k < pk }"
+        % (
+            STATEMENT_VAR_NAME,
+            _lex_point_string([0, ], lid_axes=[0, 1], gid_axes=[0]),
             )
         )
-    sched_map_after_expected = ensure_dim_names_match_and_align(
-        sched_map_after_expected, sched_map_after)
 
-    assert sched_map_before == sched_map_before_expected
-    assert sched_map_after == sched_map_after_expected
+    sched_map_after_expected = isl.Map(
+        "[pt] -> { [%s=1, t] -> [%s] : 0 <= t < pt }"
+        % (
+            STATEMENT_VAR_NAME,
+            _lex_point_string([1, ], lid_axes=[0, 1], gid_axes=[0]),
+            )
+        )
+
+    _align_and_compare_maps(
+        [sched_map_before_expected, sched_map_after_expected],
+        [sched_map_before, sched_map_after],
+        )
+
+    # ------------------------------------------------------------------------------
+    # Relationship between insn_b and insn_d ---------------------------------------
+
+    # Get two maps
+    (sched_map_before, sched_map_after), sched_lex_order_map = sched_maps[
+        ("insn_b", "insn_d")]
+
+    # Create expected maps and compare
+
+    sched_map_before_expected = isl.Map(
+        "[pi, pj] -> { [%s=0, i, j, jj] -> [%s] : 0 <= i < pi and 0 <= j,jj < pj }"
+        % (
+            STATEMENT_VAR_NAME,
+            _lex_point_string([0, ], lid_axes=[0, 1], gid_axes=[0]),
+            )
+        )
+
+    sched_map_after_expected = isl.Map(
+        "[pt] -> { [%s=1, t] -> [%s] : 0 <= t < pt }"
+        % (
+            STATEMENT_VAR_NAME,
+            _lex_point_string([1, ], lid_axes=[0, 1], gid_axes=[0]),
+            )
+        )
+
+    _align_and_compare_maps(
+        [sched_map_before_expected, sched_map_after_expected],
+        [sched_map_before, sched_map_after],
+        )
+
+    # ------------------------------------------------------------------------------
 
 # }}}
 
@@ -418,7 +470,9 @@ def test_lex_order_map_creation():
         append_marker_to_isl_map_var_names,
     )
 
-    def _check_lex_map(expected_lex_order_map, n_dims):
+    def _check_lex_map(
+            expected_lex_order_map, n_dims, lid_axes_used=[], gid_axes_used=[]):
+
         # Isl ignores the apostrophes, so explicitly add them
         expected_lex_order_map = append_marker_to_isl_map_var_names(
             expected_lex_order_map, isl.dim_type.in_, "'")
@@ -427,6 +481,9 @@ def _check_lex_map(expected_lex_order_map, n_dims):
             n_dims=n_dims,
             before_names=["%s%d'" % (LEX_VAR_PREFIX, i) for i in range(n_dims)],
             after_names=["%s%d" % (LEX_VAR_PREFIX, i) for i in range(n_dims)],
+            after_names_concurrent=[
+                LTAG_VAR_NAMES[i] for i in lid_axes_used] + [
+                GTAG_VAR_NAMES[i] for i in gid_axes_used],
             )
 
         assert lex_order_map == expected_lex_order_map
@@ -465,26 +522,95 @@ def _check_lex_map(expected_lex_order_map, n_dims):
 
     _check_lex_map(expected_lex_order_map, 1)
 
+    # Lex map for kernel with parallel HW tags
+
+    lid_axes_used = [0, 1]
+    gid_axes_used = [0, 1, 2]
+    hw_par_lex_vars = [
+        LTAG_VAR_NAMES[i] for i in lid_axes_used] + [
+        GTAG_VAR_NAMES[i] for i in gid_axes_used]
+    expected_lex_order_map = isl.Map(
+        "{{ "
+        "[{0}0', {0}1', {0}2', {1}', {2}', {3}', {4}', {5}'] "
+        "-> [{0}0, {0}1, {0}2, {1}, {2}, {3}, {4}, {5}] :"
+        "(("
+        "{0}0' < {0}0 "
+        ") or ("
+        "{0}0'={0}0 and {0}1' < {0}1 "
+        ") or ("
+        "{0}0'={0}0 and {0}1'={0}1 and {0}2' < {0}2 "
+        ")) and ("
+        "{1}' = {1} and {2}' = {2} and {3}' = {3} and {4}' = {4} and {5}' = {5}"
+        ")"
+        "}}".format(LEX_VAR_PREFIX, *hw_par_lex_vars))
+
+    _check_lex_map(
+        expected_lex_order_map, 3,
+        lid_axes_used=lid_axes_used, gid_axes_used=gid_axes_used)
+
 # }}}
 
 
 # {{{ test statement instance ordering creation
 
+def _check_sio_for_stmt_pair(
+        expected_sio,
+        stmt_id_before,
+        stmt_id_after,
+        sched_maps,
+        expected_seq_lex_dims,
+        lid_axes_used=[],
+        gid_axes_used=[],
+        ):
+    from loopy.schedule.checker.lexicographic_order_map import (
+        get_statement_ordering_map,
+        create_lex_order_map,
+    )
+    from loopy.schedule.checker.utils import (
+        ensure_dim_names_match_and_align,
+    )
+
+    # Get pairwise schedule
+    (sched_map_before, sched_map_after), sched_lex_order_map = sched_maps[
+        (stmt_id_before, stmt_id_after)]
+
+    # Get expected lex order map
+    expected_lex_order_map = create_lex_order_map(
+        n_dims=expected_seq_lex_dims,
+        before_names=["%s%d'" % (LEX_VAR_PREFIX, i)
+            for i in range(expected_seq_lex_dims)],
+        after_names=["%s%d" % (LEX_VAR_PREFIX, i)
+            for i in range(expected_seq_lex_dims)],
+        after_names_concurrent=[
+            LTAG_VAR_NAMES[i] for i in lid_axes_used] + [
+            GTAG_VAR_NAMES[i] for i in gid_axes_used],
+        )
+
+    assert sched_lex_order_map == expected_lex_order_map
+
+    # Create statement instance ordering,
+    # maps each statement instance to all statement instances occuring later
+    sio = get_statement_ordering_map(
+        sched_map_before,
+        sched_map_after,
+        sched_lex_order_map,
+        )
+
+    sio_aligned = ensure_dim_names_match_and_align(sio, expected_sio)
+
+    assert sio_aligned == expected_sio
+
+
 def test_statement_instance_ordering_creation():
     import islpy as isl
     from loopy.schedule.checker import (
         get_schedules_for_statement_pairs,
     )
     from loopy.schedule.checker.utils import (
-        ensure_dim_names_match_and_align,
         append_marker_to_isl_map_var_names,
     )
-    from loopy.schedule.checker.lexicographic_order_map import (
-        get_statement_ordering_map,
-        create_lex_order_map,
-    )
 
-    # example kernel (add deps to fix loop order)
+    # Example kernel (add deps to fix loop order)
     knl = lp.make_kernel(
         [
             "{[i]: 0<=i<pi}",
@@ -495,15 +621,15 @@ def test_statement_instance_ordering_creation():
         """
         for i
             for k
-                <>temp = b[i,k]  {id=insn_a}
+                <>temp = b[i,k]  {id=stmt_a}
             end
             for j
-                a[i,j] = temp + 1  {id=insn_b,dep=insn_a}
-                c[i,j] = d[i,j]  {id=insn_c,dep=insn_b}
+                a[i,j] = temp + 1  {id=stmt_b,dep=stmt_a}
+                c[i,j] = d[i,j]  {id=stmt_c,dep=stmt_b}
             end
         end
         for t
-            e[t] = f[t]  {id=insn_d, dep=insn_c}
+            e[t] = f[t]  {id=stmt_d, dep=stmt_c}
         end
         """,
         name="example",
@@ -516,61 +642,27 @@ def test_statement_instance_ordering_creation():
     knl = lp.prioritize_loops(knl, "i,k")
     knl = lp.prioritize_loops(knl, "i,j")
 
-    # get a linearization
+    # Get a linearization
     knl = preprocess_kernel(knl)
     knl = get_one_linearized_kernel(knl)
     linearization_items = knl.linearization
 
     # Get pairwise schedules
-    insn_id_pairs = [
-        ("insn_a", "insn_b"),
-        ("insn_a", "insn_c"),
-        ("insn_a", "insn_d"),
-        ("insn_b", "insn_c"),
-        ("insn_b", "insn_d"),
-        ("insn_c", "insn_d"),
+    stmt_id_pairs = [
+        ("stmt_a", "stmt_b"),
+        ("stmt_a", "stmt_c"),
+        ("stmt_a", "stmt_d"),
+        ("stmt_b", "stmt_c"),
+        ("stmt_b", "stmt_d"),
+        ("stmt_c", "stmt_d"),
         ]
     sched_maps = get_schedules_for_statement_pairs(
         knl,
         linearization_items,
-        insn_id_pairs,
+        stmt_id_pairs,
         )
 
-    def check_sio_for_insn_pair(
-            insn_id_before,
-            insn_id_after,
-            expected_lex_dims,
-            expected_sio,
-            ):
-
-        # Get pairwise schedule
-        (sched_map_before, sched_map_after), sched_lex_order_map = sched_maps[
-            (insn_id_before, insn_id_after)]
-
-        # Get expected lex order map
-        expected_lex_order_map = create_lex_order_map(
-            n_dims=expected_lex_dims,
-            before_names=["%s%d'" % (LEX_VAR_PREFIX, i)
-                for i in range(expected_lex_dims)],
-            after_names=["%s%d" % (LEX_VAR_PREFIX, i)
-                for i in range(expected_lex_dims)],
-            )
-
-        assert sched_lex_order_map == expected_lex_order_map
-
-        # create statement instance ordering,
-        # maps each statement instance to all statement instances occuring later
-        sio = get_statement_ordering_map(
-            sched_map_before,
-            sched_map_after,
-            sched_lex_order_map,
-            )
-
-        sio_aligned = ensure_dim_names_match_and_align(sio, expected_sio)
-
-        assert sio_aligned == expected_sio
-
-    # Relationship between insn_a and insn_b ---------------------------------------
+    # Relationship between stmt_a and stmt_b ---------------------------------------
 
     expected_sio = isl.Map(
         "[pi, pj, pk] -> {{ "
@@ -584,9 +676,9 @@ def check_sio_for_insn_pair(
     expected_sio = append_marker_to_isl_map_var_names(
         expected_sio, isl.dim_type.in_, "'")
 
-    check_sio_for_insn_pair("insn_a", "insn_b", 2, expected_sio)
+    _check_sio_for_stmt_pair(expected_sio, "stmt_a", "stmt_b", sched_maps, 2)
 
-    # Relationship between insn_a and insn_c ---------------------------------------
+    # Relationship between stmt_a and stmt_c ---------------------------------------
 
     expected_sio = isl.Map(
         "[pi, pj, pk] -> {{ "
@@ -600,9 +692,9 @@ def check_sio_for_insn_pair(
     expected_sio = append_marker_to_isl_map_var_names(
         expected_sio, isl.dim_type.in_, "'")
 
-    check_sio_for_insn_pair("insn_a", "insn_c", 2, expected_sio)
+    _check_sio_for_stmt_pair(expected_sio, "stmt_a", "stmt_c", sched_maps, 2)
 
-    # Relationship between insn_a and insn_d ---------------------------------------
+    # Relationship between stmt_a and stmt_d ---------------------------------------
 
     expected_sio = isl.Map(
         "[pt, pi, pk] -> {{ "
@@ -614,9 +706,9 @@ def check_sio_for_insn_pair(
     expected_sio = append_marker_to_isl_map_var_names(
         expected_sio, isl.dim_type.in_, "'")
 
-    check_sio_for_insn_pair("insn_a", "insn_d", 1, expected_sio)
+    _check_sio_for_stmt_pair(expected_sio, "stmt_a", "stmt_d", sched_maps, 1)
 
-    # Relationship between insn_b and insn_c ---------------------------------------
+    # Relationship between stmt_b and stmt_c ---------------------------------------
 
     expected_sio = isl.Map(
         "[pi, pj] -> {{ "
@@ -632,9 +724,9 @@ def check_sio_for_insn_pair(
     expected_sio = append_marker_to_isl_map_var_names(
         expected_sio, isl.dim_type.in_, "'")
 
-    check_sio_for_insn_pair("insn_b", "insn_c", 3, expected_sio)
+    _check_sio_for_stmt_pair(expected_sio, "stmt_b", "stmt_c", sched_maps, 3)
 
-    # Relationship between insn_b and insn_d ---------------------------------------
+    # Relationship between stmt_b and stmt_d ---------------------------------------
 
     expected_sio = isl.Map(
         "[pt, pi, pj] -> {{ "
@@ -646,9 +738,9 @@ def check_sio_for_insn_pair(
     expected_sio = append_marker_to_isl_map_var_names(
         expected_sio, isl.dim_type.in_, "'")
 
-    check_sio_for_insn_pair("insn_b", "insn_d", 1, expected_sio)
+    _check_sio_for_stmt_pair(expected_sio, "stmt_b", "stmt_d", sched_maps, 1)
 
-    # Relationship between insn_c and insn_d ---------------------------------------
+    # Relationship between stmt_c and stmt_d ---------------------------------------
 
     expected_sio = isl.Map(
         "[pt, pi, pj] -> {{ "
@@ -660,7 +752,7 @@ def check_sio_for_insn_pair(
     expected_sio = append_marker_to_isl_map_var_names(
         expected_sio, isl.dim_type.in_, "'")
 
-    check_sio_for_insn_pair("insn_c", "insn_d", 1, expected_sio)
+    _check_sio_for_stmt_pair(expected_sio, "stmt_c", "stmt_d", sched_maps, 1)
 
 # TODO test SIO creation with parallel loops
 

From fc9576de00122739ffedd91983fef14ce003e69f Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Sat, 27 Feb 2021 19:55:43 -0600
Subject: [PATCH 179/460] stop checking lex map accuracy in
 _check_sio_for_stmt_pair() (it's already tested separately, and may not be
 returned with schedule maps later)

---
 test/test_linearization_checker.py | 30 ++++++------------------------
 1 file changed, 6 insertions(+), 24 deletions(-)

diff --git a/test/test_linearization_checker.py b/test/test_linearization_checker.py
index 3d8e7203f..22b106eae 100644
--- a/test/test_linearization_checker.py
+++ b/test/test_linearization_checker.py
@@ -558,13 +558,9 @@ def _check_sio_for_stmt_pair(
         stmt_id_before,
         stmt_id_after,
         sched_maps,
-        expected_seq_lex_dims,
-        lid_axes_used=[],
-        gid_axes_used=[],
         ):
     from loopy.schedule.checker.lexicographic_order_map import (
         get_statement_ordering_map,
-        create_lex_order_map,
     )
     from loopy.schedule.checker.utils import (
         ensure_dim_names_match_and_align,
@@ -574,20 +570,6 @@ def _check_sio_for_stmt_pair(
     (sched_map_before, sched_map_after), sched_lex_order_map = sched_maps[
         (stmt_id_before, stmt_id_after)]
 
-    # Get expected lex order map
-    expected_lex_order_map = create_lex_order_map(
-        n_dims=expected_seq_lex_dims,
-        before_names=["%s%d'" % (LEX_VAR_PREFIX, i)
-            for i in range(expected_seq_lex_dims)],
-        after_names=["%s%d" % (LEX_VAR_PREFIX, i)
-            for i in range(expected_seq_lex_dims)],
-        after_names_concurrent=[
-            LTAG_VAR_NAMES[i] for i in lid_axes_used] + [
-            GTAG_VAR_NAMES[i] for i in gid_axes_used],
-        )
-
-    assert sched_lex_order_map == expected_lex_order_map
-
     # Create statement instance ordering,
     # maps each statement instance to all statement instances occuring later
     sio = get_statement_ordering_map(
@@ -676,7 +658,7 @@ def test_statement_instance_ordering_creation():
     expected_sio = append_marker_to_isl_map_var_names(
         expected_sio, isl.dim_type.in_, "'")
 
-    _check_sio_for_stmt_pair(expected_sio, "stmt_a", "stmt_b", sched_maps, 2)
+    _check_sio_for_stmt_pair(expected_sio, "stmt_a", "stmt_b", sched_maps)
 
     # Relationship between stmt_a and stmt_c ---------------------------------------
 
@@ -692,7 +674,7 @@ def test_statement_instance_ordering_creation():
     expected_sio = append_marker_to_isl_map_var_names(
         expected_sio, isl.dim_type.in_, "'")
 
-    _check_sio_for_stmt_pair(expected_sio, "stmt_a", "stmt_c", sched_maps, 2)
+    _check_sio_for_stmt_pair(expected_sio, "stmt_a", "stmt_c", sched_maps)
 
     # Relationship between stmt_a and stmt_d ---------------------------------------
 
@@ -706,7 +688,7 @@ def test_statement_instance_ordering_creation():
     expected_sio = append_marker_to_isl_map_var_names(
         expected_sio, isl.dim_type.in_, "'")
 
-    _check_sio_for_stmt_pair(expected_sio, "stmt_a", "stmt_d", sched_maps, 1)
+    _check_sio_for_stmt_pair(expected_sio, "stmt_a", "stmt_d", sched_maps)
 
     # Relationship between stmt_b and stmt_c ---------------------------------------
 
@@ -724,7 +706,7 @@ def test_statement_instance_ordering_creation():
     expected_sio = append_marker_to_isl_map_var_names(
         expected_sio, isl.dim_type.in_, "'")
 
-    _check_sio_for_stmt_pair(expected_sio, "stmt_b", "stmt_c", sched_maps, 3)
+    _check_sio_for_stmt_pair(expected_sio, "stmt_b", "stmt_c", sched_maps)
 
     # Relationship between stmt_b and stmt_d ---------------------------------------
 
@@ -738,7 +720,7 @@ def test_statement_instance_ordering_creation():
     expected_sio = append_marker_to_isl_map_var_names(
         expected_sio, isl.dim_type.in_, "'")
 
-    _check_sio_for_stmt_pair(expected_sio, "stmt_b", "stmt_d", sched_maps, 1)
+    _check_sio_for_stmt_pair(expected_sio, "stmt_b", "stmt_d", sched_maps)
 
     # Relationship between stmt_c and stmt_d ---------------------------------------
 
@@ -752,7 +734,7 @@ def test_statement_instance_ordering_creation():
     expected_sio = append_marker_to_isl_map_var_names(
         expected_sio, isl.dim_type.in_, "'")
 
-    _check_sio_for_stmt_pair(expected_sio, "stmt_c", "stmt_d", sched_maps, 1)
+    _check_sio_for_stmt_pair(expected_sio, "stmt_c", "stmt_d", sched_maps)
 
 # TODO test SIO creation with parallel loops
 

From fbba3478215aaac811accc151acd29ea514bfe4c Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Sat, 27 Feb 2021 21:34:09 -0600
Subject: [PATCH 180/460] started work on test for SIO with parallel kernel
 (commented out for now; dealing with issue found)

---
 loopy/schedule/checker/schedule.py |   4 +
 test/test_linearization_checker.py | 145 ++++++++++++++++++++++++++++-
 2 files changed, 145 insertions(+), 4 deletions(-)

diff --git a/loopy/schedule/checker/schedule.py b/loopy/schedule/checker/schedule.py
index 734d568c3..1552449a1 100644
--- a/loopy/schedule/checker/schedule.py
+++ b/loopy/schedule/checker/schedule.py
@@ -269,6 +269,10 @@ def _get_map_for_stmt(
             params=[],
             )
 
+        # TODO Either set inames equal to relevant gid/lid var names
+        # or replace inames with gid/lid var names...
+        # (otherwise gid/lid conditions will be lost in SIO composition)
+
         # Insert 'statement' dim into domain so that its space allows
         # for intersection with sched map later
         dom_to_intersect = add_dims_to_isl_set(
diff --git a/test/test_linearization_checker.py b/test/test_linearization_checker.py
index 22b106eae..bf3f1c0ce 100644
--- a/test/test_linearization_checker.py
+++ b/test/test_linearization_checker.py
@@ -128,7 +128,7 @@ def test_pairwise_schedule_creation():
         ("insn_c", "insn_d"),
         ]
     sched_maps = get_schedules_for_statement_pairs(
-        proc_knl,
+        lin_knl,
         linearization_items,
         insn_id_pairs,
         )
@@ -360,7 +360,7 @@ def test_pairwise_schedule_creation_with_hw_par_tags():
         ("insn_b", "insn_d"),
         ]
     sched_maps = get_schedules_for_statement_pairs(
-        proc_knl,
+        lin_knl,
         linearization_items,
         insn_id_pairs,
         )
@@ -583,7 +583,7 @@ def _check_sio_for_stmt_pair(
     assert sio_aligned == expected_sio
 
 
-def test_statement_instance_ordering_creation():
+def test_statement_instance_ordering():
     import islpy as isl
     from loopy.schedule.checker import (
         get_schedules_for_statement_pairs,
@@ -736,7 +736,144 @@ def test_statement_instance_ordering_creation():
 
     _check_sio_for_stmt_pair(expected_sio, "stmt_c", "stmt_d", sched_maps)
 
-# TODO test SIO creation with parallel loops
+
+'''
+def test_statement_instance_ordering_with_hw_par_tags():
+    import islpy as isl
+    from loopy.schedule.checker import (
+        get_schedules_for_statement_pairs,
+    )
+    from loopy.schedule.checker.utils import (
+        append_marker_to_isl_map_var_names,
+        append_marker_to_strings,
+    )
+
+    # Example kernel
+    knl = lp.make_kernel(
+        [
+            "{[i]: 0<=i<pi}",
+            "{[k]: 0<=k<pk}",
+            "{[j,jj]: 0<=j,jj<pj}",
+            "{[t]: 0<=t<pt}",
+        ],
+        """
+        for i
+            for k
+                <>temp = b[i,k]  {id=stmt_a}
+            end
+            for j
+                for jj
+                    a[i,j,jj] = temp + 1  {id=stmt_b,dep=stmt_a}
+                end
+            end
+        end
+        for t
+            e[t] = f[t]  {id=stmt_d, dep=stmt_b}
+        end
+        """,
+        name="example",
+        assumptions="pi,pj,pk,pt >= 1",
+        lang_version=(2018, 2)
+        )
+    knl = lp.add_and_infer_dtypes(knl, {"b": np.float32, "f": np.float32})
+    knl = lp.prioritize_loops(knl, "i,k")
+    knl = lp.tag_inames(knl, {"j": "l.1", "jj": "l.0", "t": "g.0"})
+
+    # Get a linearization
+    proc_knl = preprocess_kernel(knl)
+    lin_knl = get_one_linearized_kernel(proc_knl)
+    linearization_items = lin_knl.linearization
+
+    # Get pairwise schedules
+    stmt_id_pairs = [
+        ("stmt_a", "stmt_b"),
+        ("stmt_a", "stmt_d"),
+        ("stmt_b", "stmt_d"),
+        ]
+    sched_maps = get_schedules_for_statement_pairs(
+        lin_knl,
+        linearization_items,
+        stmt_id_pairs,
+        )
+
+    # Create strings for representing hardware tag portions of sio maps
+
+    # Get par tag names for this kernel
+    ltag_var_names = [LTAG_VAR_NAMES[lid] for lid in [0, 1]]
+    gtag_var_names = [GTAG_VAR_NAMES[gid] for gid in [0]]
+
+    # Equality condition, e.g., "lid0' = lid0 and lid1' = lid1, and ..."
+    par_tag_condition = " and ".join(
+        ["{0}' = {0}".format(ltag) for ltag in ltag_var_names] +
+        ["{0}' = {0}".format(gtag) for gtag in gtag_var_names]
+        )
+
+    # Comma separated dim names, e.g., "lid0', lid1', gid0'"
+    par_tag_var_names = ", ".join(ltag_var_names + gtag_var_names)
+    par_tag_var_names_prime = ", ".join(
+        append_marker_to_strings(ltag_var_names + gtag_var_names, "'"))
+
+    # Relationship between stmt_a and stmt_b ---------------------------------------
+
+    expected_sio = isl.Map(
+        "[pi, pj, pk] -> {{ "
+        "[{0}'=0, i', k', {1}] -> [{0}=1, i, j, {2}] : "
+        "0 <= i' < pi and 0 <= k' < pk and 0 <= j < pj and 0 <= i < pi and i > i' "
+        "and {3}; "
+        "[{0}'=0, i', k', {1}] -> [{0}=1, i=i', j, {2}] : "
+        "0 <= i' < pi and 0 <= k' < pk and 0 <= j < pj "
+        "and {3}"
+        "}}".format(
+            STATEMENT_VAR_NAME,
+            par_tag_var_names_prime,
+            par_tag_var_names,
+            par_tag_condition,
+            )
+        )
+    # isl ignores these apostrophes, so explicitly add them
+    expected_sio = append_marker_to_isl_map_var_names(
+        expected_sio, isl.dim_type.in_, "'")
+
+    _check_sio_for_stmt_pair(expected_sio, "stmt_a", "stmt_b", sched_maps)
+
+    # Relationship between stmt_a and stmt_d ---------------------------------------
+
+    expected_sio = isl.Map(
+        "[pt, pi, pk] -> {{ "
+        "[{0}'=0, i', k', {1}] -> [{0}=1, t, {2}] : "
+        "0 <= i' < pi and 0 <= k' < pk and 0 <= t < pt and {3}"
+        "}}".format(
+            STATEMENT_VAR_NAME,
+            par_tag_var_names_prime,
+            par_tag_var_names,
+            par_tag_condition,
+            )
+        )
+    # isl ignores these apostrophes, so explicitly add them
+    expected_sio = append_marker_to_isl_map_var_names(
+        expected_sio, isl.dim_type.in_, "'")
+
+    _check_sio_for_stmt_pair(expected_sio, "stmt_a", "stmt_d", sched_maps)
+
+    # Relationship between stmt_b and stmt_d ---------------------------------------
+
+    expected_sio = isl.Map(
+        "[pt, pi, pj] -> {{ "
+        "[{0}'=0, i', j', {1}] -> [{0}=1, t, {2}] : "
+        "0 <= i' < pi and 0 <= j' < pj and 0 <= t < pt and {3}"
+        "}}".format(
+            STATEMENT_VAR_NAME,
+            par_tag_var_names_prime,
+            par_tag_var_names,
+            par_tag_condition,
+            )
+        )
+    # isl ignores these apostrophes, so explicitly add them
+    expected_sio = append_marker_to_isl_map_var_names(
+        expected_sio, isl.dim_type.in_, "'")
+
+    _check_sio_for_stmt_pair(expected_sio, "stmt_b", "stmt_d", sched_maps)
+'''
 
 # }}}
 

From f408c86bb432f3e951f6d9dacd76241aec8a4b63 Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Wed, 3 Mar 2021 20:57:35 -0600
Subject: [PATCH 181/460] set parallel inames equal to corresponding gid/lid
 var names in schedules; add test for SIO creation with parallel inames;
 update other tests accordinglyly

---
 loopy/schedule/checker/schedule.py |  57 ++++----
 test/test_linearization_checker.py | 224 +++++++----------------------
 2 files changed, 80 insertions(+), 201 deletions(-)

diff --git a/loopy/schedule/checker/schedule.py b/loopy/schedule/checker/schedule.py
index 1552449a1..c6f5e43e5 100644
--- a/loopy/schedule/checker/schedule.py
+++ b/loopy/schedule/checker/schedule.py
@@ -243,6 +243,31 @@ def generate_pairwise_schedules(
 
     # Second, create pairwise schedules for each individual pair of insns
 
+    # Get dim names representing local/group axes for this kernel,
+    # and get the dictionary that will be used later to create a
+    # constraint requiring {par inames == par axes} in sched
+    l_axes_used = set()
+    g_axes_used = set()
+    par_iname_constraint_dicts = []
+    for iname in knl.all_inames():
+        ltag = knl.iname_tags_of_type(iname, LocalIndexTag)
+        if ltag:
+            # assert len(ltag) == 1  # (should always be true)
+            ltag_var = LTAG_VAR_NAMES[ltag.pop().axis]
+            l_axes_used.add(ltag_var)
+            # Represent constraint 'iname = ltag_var' in par_iname_constraint_dicts:
+            par_iname_constraint_dicts.append({1: 0, iname: 1, ltag_var: -1})
+            continue
+        gtag = knl.iname_tags_of_type(iname, GroupIndexTag)
+        if gtag:
+            # assert len(gtag) == 1  # (should always be true)
+            gtag_var = GTAG_VAR_NAMES[gtag.pop().axis]
+            g_axes_used.add(gtag_var)
+            # Represent constraint 'iname = gtag_var' in par_iname_constraint_dicts:
+            par_iname_constraint_dicts.append({1: 0, iname: 1, gtag_var: -1})
+            continue
+    conc_lex_dim_names = sorted(l_axes_used) + sorted(g_axes_used)
+
     from loopy.schedule.checker.utils import (
         sorted_union_of_names_in_isl_sets,
         create_symbolic_map_from_tuples,
@@ -269,10 +294,6 @@ def _get_map_for_stmt(
             params=[],
             )
 
-        # TODO Either set inames equal to relevant gid/lid var names
-        # or replace inames with gid/lid var names...
-        # (otherwise gid/lid conditions will be lost in SIO composition)
-
         # Insert 'statement' dim into domain so that its space allows
         # for intersection with sched map later
         dom_to_intersect = add_dims_to_isl_set(
@@ -287,31 +308,17 @@ def _get_map_for_stmt(
             )]
 
         # Create map
-        return create_symbolic_map_from_tuples(
+        sched_map = create_symbolic_map_from_tuples(
             tuple_pairs_with_domains=zip(tuple_pair, [dom_to_intersect, ]),
             space=sched_space,
             )
 
-    # Get local/group axes for this kernel
-    l_axes_used = set()
-    g_axes_used = set()
-    for iname in knl.all_inames():
-        ltag = knl.iname_tags_of_type(iname, LocalIndexTag)
-        if ltag:
-            assert len(ltag) == 1  # TODO always true? remove?
-            l_axes_used.add(ltag.pop().axis)
-            continue
-        gtag = knl.iname_tags_of_type(iname, GroupIndexTag)
-        if gtag:
-            assert len(gtag) == 1  # TODO always true? remove?
-            g_axes_used.add(gtag.pop().axis)
-            continue
-    conc_lex_dim_names = (
-        [LTAG_VAR_NAMES[i] for i in sorted(l_axes_used)] +
-        [GTAG_VAR_NAMES[i] for i in sorted(g_axes_used)]
-        )
-    # TODO (For now, using same loc/glob axes for for all pairwise
-    # schedules in this knl.)
+        # Set inames equal to relevant gid/lid var names
+        for constraint_dict in par_iname_constraint_dicts:
+            sched_map = sched_map.add_constraint(
+                isl.Constraint.eq_from_names(sched_map.space, constraint_dict))
+
+        return sched_map
 
     from loopy.schedule.checker.lexicographic_order_map import (
         create_lex_order_map,
diff --git a/test/test_linearization_checker.py b/test/test_linearization_checker.py
index bf3f1c0ce..a1a8c6909 100644
--- a/test/test_linearization_checker.py
+++ b/test/test_linearization_checker.py
@@ -58,17 +58,19 @@ def _align_and_compare_maps(maps1, maps2):
         assert map1_aligned == map2
 
 
-def _lex_point_string(dim_vals, lid_axes=[], gid_axes=[]):
+def _lex_point_string(dim_vals, lid_inames=[], gid_inames=[]):
     # Return a string describing a point in a lex space
     # by assigning values to lex dimension variables
     # (used to create maps below)
 
-    lid_names = [LTAG_VAR_NAMES[i] for i in lid_axes]
-    gid_names = [GTAG_VAR_NAMES[i] for i in gid_axes]
-
     return ", ".join(
         ["%s%d=%s" % (LEX_VAR_PREFIX, idx, str(val))
-        for idx, val in enumerate(dim_vals)] + lid_names + gid_names)
+        for idx, val in enumerate(dim_vals)] +
+        ["%s=%s" % (LTAG_VAR_NAMES[idx], iname)
+        for idx, iname in enumerate(lid_inames)] +
+        ["%s=%s" % (GTAG_VAR_NAMES[idx], iname)
+        for idx, iname in enumerate(gid_inames)]
+        )
 
 # }}}
 
@@ -322,130 +324,63 @@ def test_pairwise_schedule_creation_with_hw_par_tags():
     # Example kernel
     knl = lp.make_kernel(
         [
-            "{[i]: 0<=i<pi}",
-            "{[k]: 0<=k<pk}",
+            "{[i,ii]: 0<=i,ii<pi}",
             "{[j,jj]: 0<=j,jj<pj}",
-            "{[t]: 0<=t<pt}",
         ],
         """
         for i
-            for k
-                <>temp = b[i,k]  {id=insn_a}
-            end
-            for j
-                for jj
-                    a[i,j,jj] = temp + 1  {id=insn_b,dep=insn_a}
+            for ii
+                for j
+                    for jj
+                        <>temp = b[i,ii,j,jj]  {id=stmt_a}
+                        a[i,ii,j,jj] = temp + 1  {id=stmt_b,dep=stmt_a}
+                    end
                 end
             end
         end
-        for t
-            e[t] = f[t]  {id=insn_d, dep=insn_b}
-        end
         """,
         name="example",
-        assumptions="pi,pj,pk,pt >= 1",
+        assumptions="pi,pj >= 1",
+        lang_version=(2018, 2)
         )
-    knl = lp.add_and_infer_dtypes(knl, {"b": np.float32, "f": np.float32})
-    knl = lp.prioritize_loops(knl, "i,k")
-    knl = lp.tag_inames(knl, {"j": "l.1", "jj": "l.0", "t": "g.0"})
+    knl = lp.add_and_infer_dtypes(knl, {"a": np.float32, "b": np.float32})
+    knl = lp.tag_inames(knl, {"j": "l.1", "jj": "l.0", "i": "g.0"})
 
     # Get a linearization
     proc_knl = preprocess_kernel(knl)
     lin_knl = get_one_linearized_kernel(proc_knl)
     linearization_items = lin_knl.linearization
 
-    insn_id_pairs = [
-        ("insn_a", "insn_b"),
-        ("insn_a", "insn_d"),
-        ("insn_b", "insn_d"),
+    stmt_id_pairs = [
+        ("stmt_a", "stmt_b"),
         ]
     sched_maps = get_schedules_for_statement_pairs(
         lin_knl,
         linearization_items,
-        insn_id_pairs,
-        )
-
-    # Relationship between insn_a and insn_b ---------------------------------------
-
-    # Get two maps
-    (sched_map_before, sched_map_after), sched_lex_order_map = sched_maps[
-        ("insn_a", "insn_b")]
-
-    # Create expected maps and compare
-
-    sched_map_before_expected = isl.Map(
-        "[pi, pk] -> { [%s=0, i, k] -> [%s] : 0 <= i < pi and 0 <= k < pk }"
-        % (
-            STATEMENT_VAR_NAME,
-            _lex_point_string(["i", "0"], lid_axes=[0, 1], gid_axes=[0]),
-            )
-        )
-
-    sched_map_after_expected = isl.Map(
-        "[pi, pj] -> { [%s=1, i, j, jj] -> [%s] : 0 <= i < pi and 0 <= j,jj < pj }"
-        % (
-            STATEMENT_VAR_NAME,
-            _lex_point_string(["i", "1"], lid_axes=[0, 1], gid_axes=[0]),
-            )
-        )
-
-    _align_and_compare_maps(
-        [sched_map_before_expected, sched_map_after_expected],
-        [sched_map_before, sched_map_after],
-        )
-
-    # ------------------------------------------------------------------------------
-    # Relationship between insn_a and insn_d ---------------------------------------
-
-    # Get two maps
-    (sched_map_before, sched_map_after), sched_lex_order_map = sched_maps[
-        ("insn_a", "insn_d")]
-
-    # Create expected maps and compare
-
-    sched_map_before_expected = isl.Map(
-        "[pi, pk] -> { [%s=0, i, k] -> [%s] : 0 <= i < pi and 0 <= k < pk }"
-        % (
-            STATEMENT_VAR_NAME,
-            _lex_point_string([0, ], lid_axes=[0, 1], gid_axes=[0]),
-            )
-        )
-
-    sched_map_after_expected = isl.Map(
-        "[pt] -> { [%s=1, t] -> [%s] : 0 <= t < pt }"
-        % (
-            STATEMENT_VAR_NAME,
-            _lex_point_string([1, ], lid_axes=[0, 1], gid_axes=[0]),
-            )
-        )
-
-    _align_and_compare_maps(
-        [sched_map_before_expected, sched_map_after_expected],
-        [sched_map_before, sched_map_after],
+        stmt_id_pairs,
         )
 
-    # ------------------------------------------------------------------------------
-    # Relationship between insn_b and insn_d ---------------------------------------
+    # Relationship between stmt_a and stmt_b ---------------------------------------
 
     # Get two maps
     (sched_map_before, sched_map_after), sched_lex_order_map = sched_maps[
-        ("insn_b", "insn_d")]
+        ("stmt_a", "stmt_b")]
 
     # Create expected maps and compare
 
     sched_map_before_expected = isl.Map(
-        "[pi, pj] -> { [%s=0, i, j, jj] -> [%s] : 0 <= i < pi and 0 <= j,jj < pj }"
+        "[pi,pj] -> {[%s=0,i,ii,j,jj] -> [%s] : 0 <= i,ii < pi and 0 <= j,jj < pj}"
         % (
             STATEMENT_VAR_NAME,
-            _lex_point_string([0, ], lid_axes=[0, 1], gid_axes=[0]),
+            _lex_point_string(["ii", "0"], lid_inames=["jj", "j"], gid_inames=["i"]),
             )
         )
 
     sched_map_after_expected = isl.Map(
-        "[pt] -> { [%s=1, t] -> [%s] : 0 <= t < pt }"
+        "[pi,pj] -> {[%s=1,i,ii,j,jj] -> [%s] : 0 <= i,ii < pi and 0 <= j,jj < pj}"
         % (
             STATEMENT_VAR_NAME,
-            _lex_point_string([1, ], lid_axes=[0, 1], gid_axes=[0]),
+            _lex_point_string(["ii", "1"], lid_inames=["jj", "j"], gid_inames=["i"]),
             )
         )
 
@@ -737,7 +672,6 @@ def test_statement_instance_ordering():
     _check_sio_for_stmt_pair(expected_sio, "stmt_c", "stmt_d", sched_maps)
 
 
-'''
 def test_statement_instance_ordering_with_hw_par_tags():
     import islpy as isl
     from loopy.schedule.checker import (
@@ -745,39 +679,33 @@ def test_statement_instance_ordering_with_hw_par_tags():
     )
     from loopy.schedule.checker.utils import (
         append_marker_to_isl_map_var_names,
-        append_marker_to_strings,
+        partition_inames_by_concurrency,
     )
 
     # Example kernel
     knl = lp.make_kernel(
         [
-            "{[i]: 0<=i<pi}",
-            "{[k]: 0<=k<pk}",
+            "{[i,ii]: 0<=i,ii<pi}",
             "{[j,jj]: 0<=j,jj<pj}",
-            "{[t]: 0<=t<pt}",
         ],
         """
         for i
-            for k
-                <>temp = b[i,k]  {id=stmt_a}
-            end
-            for j
-                for jj
-                    a[i,j,jj] = temp + 1  {id=stmt_b,dep=stmt_a}
+            for ii
+                for j
+                    for jj
+                        <>temp = b[i,ii,j,jj]  {id=stmt_a}
+                        a[i,ii,j,jj] = temp + 1  {id=stmt_b,dep=stmt_a}
+                    end
                 end
             end
         end
-        for t
-            e[t] = f[t]  {id=stmt_d, dep=stmt_b}
-        end
         """,
         name="example",
-        assumptions="pi,pj,pk,pt >= 1",
+        assumptions="pi,pj >= 1",
         lang_version=(2018, 2)
         )
-    knl = lp.add_and_infer_dtypes(knl, {"b": np.float32, "f": np.float32})
-    knl = lp.prioritize_loops(knl, "i,k")
-    knl = lp.tag_inames(knl, {"j": "l.1", "jj": "l.0", "t": "g.0"})
+    knl = lp.add_and_infer_dtypes(knl, {"a": np.float32, "b": np.float32})
+    knl = lp.tag_inames(knl, {"j": "l.1", "jj": "l.0", "i": "g.0"})
 
     # Get a linearization
     proc_knl = preprocess_kernel(knl)
@@ -787,8 +715,6 @@ def test_statement_instance_ordering_with_hw_par_tags():
     # Get pairwise schedules
     stmt_id_pairs = [
         ("stmt_a", "stmt_b"),
-        ("stmt_a", "stmt_d"),
-        ("stmt_b", "stmt_d"),
         ]
     sched_maps = get_schedules_for_statement_pairs(
         lin_knl,
@@ -796,38 +722,21 @@ def test_statement_instance_ordering_with_hw_par_tags():
         stmt_id_pairs,
         )
 
-    # Create strings for representing hardware tag portions of sio maps
-
-    # Get par tag names for this kernel
-    ltag_var_names = [LTAG_VAR_NAMES[lid] for lid in [0, 1]]
-    gtag_var_names = [GTAG_VAR_NAMES[gid] for gid in [0]]
-
-    # Equality condition, e.g., "lid0' = lid0 and lid1' = lid1, and ..."
-    par_tag_condition = " and ".join(
-        ["{0}' = {0}".format(ltag) for ltag in ltag_var_names] +
-        ["{0}' = {0}".format(gtag) for gtag in gtag_var_names]
-        )
-
-    # Comma separated dim names, e.g., "lid0', lid1', gid0'"
-    par_tag_var_names = ", ".join(ltag_var_names + gtag_var_names)
-    par_tag_var_names_prime = ", ".join(
-        append_marker_to_strings(ltag_var_names + gtag_var_names, "'"))
+    # Create string for representing parallel iname condition in sio
+    conc_inames, _ = partition_inames_by_concurrency(knl)
+    par_iname_condition = " and ".join(
+        "{0} = {0}'".format(iname) for iname in conc_inames)
 
     # Relationship between stmt_a and stmt_b ---------------------------------------
 
     expected_sio = isl.Map(
-        "[pi, pj, pk] -> {{ "
-        "[{0}'=0, i', k', {1}] -> [{0}=1, i, j, {2}] : "
-        "0 <= i' < pi and 0 <= k' < pk and 0 <= j < pj and 0 <= i < pi and i > i' "
-        "and {3}; "
-        "[{0}'=0, i', k', {1}] -> [{0}=1, i=i', j, {2}] : "
-        "0 <= i' < pi and 0 <= k' < pk and 0 <= j < pj "
-        "and {3}"
+        "[pi, pj] -> {{ "
+        "[{0}'=0, i', ii', j', jj'] -> [{0}=1, i, ii, j, jj] : "
+        "0 <= i,ii,i',ii' < pi and 0 <= j,jj,j',jj' < pj and ii >= ii' "
+        "and {1} "
         "}}".format(
             STATEMENT_VAR_NAME,
-            par_tag_var_names_prime,
-            par_tag_var_names,
-            par_tag_condition,
+            par_iname_condition,
             )
         )
     # isl ignores these apostrophes, so explicitly add them
@@ -836,44 +745,7 @@ def test_statement_instance_ordering_with_hw_par_tags():
 
     _check_sio_for_stmt_pair(expected_sio, "stmt_a", "stmt_b", sched_maps)
 
-    # Relationship between stmt_a and stmt_d ---------------------------------------
-
-    expected_sio = isl.Map(
-        "[pt, pi, pk] -> {{ "
-        "[{0}'=0, i', k', {1}] -> [{0}=1, t, {2}] : "
-        "0 <= i' < pi and 0 <= k' < pk and 0 <= t < pt and {3}"
-        "}}".format(
-            STATEMENT_VAR_NAME,
-            par_tag_var_names_prime,
-            par_tag_var_names,
-            par_tag_condition,
-            )
-        )
-    # isl ignores these apostrophes, so explicitly add them
-    expected_sio = append_marker_to_isl_map_var_names(
-        expected_sio, isl.dim_type.in_, "'")
-
-    _check_sio_for_stmt_pair(expected_sio, "stmt_a", "stmt_d", sched_maps)
-
-    # Relationship between stmt_b and stmt_d ---------------------------------------
-
-    expected_sio = isl.Map(
-        "[pt, pi, pj] -> {{ "
-        "[{0}'=0, i', j', {1}] -> [{0}=1, t, {2}] : "
-        "0 <= i' < pi and 0 <= j' < pj and 0 <= t < pt and {3}"
-        "}}".format(
-            STATEMENT_VAR_NAME,
-            par_tag_var_names_prime,
-            par_tag_var_names,
-            par_tag_condition,
-            )
-        )
-    # isl ignores these apostrophes, so explicitly add them
-    expected_sio = append_marker_to_isl_map_var_names(
-        expected_sio, isl.dim_type.in_, "'")
-
-    _check_sio_for_stmt_pair(expected_sio, "stmt_b", "stmt_d", sched_maps)
-'''
+    # ------------------------------------------------------------------------------
 
 # }}}
 

From bef84a6c2930579e07f8a2162de8c6b783c503aa Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Wed, 3 Mar 2021 21:05:06 -0600
Subject: [PATCH 182/460] make SIO map strings more concise

---
 test/test_linearization_checker.py | 14 ++++----------
 1 file changed, 4 insertions(+), 10 deletions(-)

diff --git a/test/test_linearization_checker.py b/test/test_linearization_checker.py
index a1a8c6909..495a1a3f2 100644
--- a/test/test_linearization_checker.py
+++ b/test/test_linearization_checker.py
@@ -584,9 +584,7 @@ def test_statement_instance_ordering():
     expected_sio = isl.Map(
         "[pi, pj, pk] -> {{ "
         "[{0}'=0, i', k'] -> [{0}=1, i, j] : "
-        "0 <= i' < pi and 0 <= k' < pk and 0 <= j < pj and 0 <= i < pi and i > i'; "
-        "[{0}'=0, i', k'] -> [{0}=1, i=i', j] : "
-        "0 <= i' < pi and 0 <= k' < pk and 0 <= j < pj "
+        "0 <= i,i' < pi and 0 <= k' < pk and 0 <= j < pj and i >= i' "
         "}}".format(STATEMENT_VAR_NAME)
         )
     # isl ignores these apostrophes, so explicitly add them
@@ -600,9 +598,7 @@ def test_statement_instance_ordering():
     expected_sio = isl.Map(
         "[pi, pj, pk] -> {{ "
         "[{0}'=0, i', k'] -> [{0}=1, i, j] : "
-        "0 <= i' < pi and 0 <= k' < pk and 0 <= j < pj and 0 <= i < pi and i > i'; "
-        "[{0}'=0, i', k'] -> [{0}=1, i=i', j] : "
-        "0 <= i' < pi and 0 <= k' < pk and 0 <= j < pj "
+        "0 <= i,i' < pi and 0 <= k' < pk and 0 <= j < pj and i >= i' "
         "}}".format(STATEMENT_VAR_NAME)
         )
     # isl ignores these apostrophes, so explicitly add them
@@ -630,11 +626,9 @@ def test_statement_instance_ordering():
     expected_sio = isl.Map(
         "[pi, pj] -> {{ "
         "[{0}'=0, i', j'] -> [{0}=1, i, j] : "
-        "0 <= i' < pi and 0 <= j' < pj and i > i' and 0 <= i < pi and 0 <= j < pj; "
+        "0 <= i,i' < pi and 0 <= j,j' < pj and i > i'; "
         "[{0}'=0, i', j'] -> [{0}=1, i=i', j] : "
-        "0 <= i' < pi and 0 <= j' < pj and j > j' and 0 <= j < pj; "
-        "[{0}'=0, i', j'] -> [{0}=1, i=i', j=j'] : "
-        "0 <= i' < pi and 0 <= j' < pj "
+        "0 <= i' < pi and 0 <= j,j' < pj and j >= j'; "
         "}}".format(STATEMENT_VAR_NAME)
         )
     # isl ignores these apostrophes, so explicitly add them

From e15ddaeb1a5266b1d1b24933df081bb4ff120102 Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Wed, 3 Mar 2021 22:01:51 -0600
Subject: [PATCH 183/460] update doctest

---
 loopy/schedule/checker/__init__.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/loopy/schedule/checker/__init__.py b/loopy/schedule/checker/__init__.py
index f9e9933c6..0dfa02d34 100644
--- a/loopy/schedule/checker/__init__.py
+++ b/loopy/schedule/checker/__init__.py
@@ -81,13 +81,13 @@ def get_schedules_for_statement_pairs(
         >>> # Print maps
         >>> print("\n".join(
         ...     str(m).replace("{ ", "{\n").replace(" :", "\n:")
-        ...     for m in schedules[("insn_a", "insn_b")]
+        ...     for m in schedules[("insn_a", "insn_b")[0]]
         ...     ))
         [pi, pj, pk] -> {
-        [_lp_linchk_stmt = 0, i, j, k] -> [_lp_linchk_l0 = i, _lp_linchk_l1 = 0]
+        [_lp_linchk_stmt = 0, i, j, k] -> [_lp_linchk_lex0 = i, _lp_linchk_lex1 = 0]
         : 0 <= i < pi and 0 <= j < pj and 0 <= k < pk }
         [pi, pj, pk] -> {
-        [_lp_linchk_stmt = 1, i, j, k] -> [_lp_linchk_l0 = i, _lp_linchk_l1 = 1]
+        [_lp_linchk_stmt = 1, i, j, k] -> [_lp_linchk_lex0 = i, _lp_linchk_lex1 = 1]
         : 0 <= i < pi and 0 <= j < pj and 0 <= k < pk }
 
     """

From 6af1b23c79f2e778cf9e15989b0d664d5a407739 Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Wed, 3 Mar 2021 22:02:06 -0600
Subject: [PATCH 184/460] remove commented-out code

---
 loopy/schedule/checker/utils.py | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/loopy/schedule/checker/utils.py b/loopy/schedule/checker/utils.py
index 4c42be861..b5cdb857a 100644
--- a/loopy/schedule/checker/utils.py
+++ b/loopy/schedule/checker/utils.py
@@ -29,10 +29,7 @@ def prettier_map_string(map_obj):
 
 
 def add_dims_to_isl_set(isl_set, dim_type, names, new_idx_start):
-    new_set = isl_set.insert_dims(
-        dim_type, new_idx_start, len(names)
-        )
-    #.set_dim_name(dim_type, new_idx_start, names[0])
+    new_set = isl_set.insert_dims(dim_type, new_idx_start, len(names))
     for i, name in enumerate(names):
         new_set = new_set.set_dim_name(dim_type, new_idx_start+i, name)
     return new_set

From 2740c3dac3cf23ec53d276709c2346430ecff73f Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Wed, 3 Mar 2021 22:33:34 -0600
Subject: [PATCH 185/460] fix typo in doctest

---
 loopy/schedule/checker/__init__.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/loopy/schedule/checker/__init__.py b/loopy/schedule/checker/__init__.py
index 0dfa02d34..c138271f6 100644
--- a/loopy/schedule/checker/__init__.py
+++ b/loopy/schedule/checker/__init__.py
@@ -81,7 +81,7 @@ def get_schedules_for_statement_pairs(
         >>> # Print maps
         >>> print("\n".join(
         ...     str(m).replace("{ ", "{\n").replace(" :", "\n:")
-        ...     for m in schedules[("insn_a", "insn_b")[0]]
+        ...     for m in schedules[("insn_a", "insn_b")][0]
         ...     ))
         [pi, pj, pk] -> {
         [_lp_linchk_stmt = 0, i, j, k] -> [_lp_linchk_lex0 = i, _lp_linchk_lex1 = 0]

From 5a58c4e516f403db09eae438ec8d875a6eedb3de Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Sun, 21 Mar 2021 14:14:32 -0500
Subject: [PATCH 186/460] Create lex order maps and SIOs in same function as
 sched creation (rather than returning schedules and lex maps separately and
 combining them outside function to get SIOs) to avoid passing extra info
 around.

---
 loopy/schedule/checker/__init__.py |  1 +
 loopy/schedule/checker/schedule.py | 24 +++++++++-----
 test/test_linearization_checker.py | 50 ++++++++++++++++++++----------
 3 files changed, 52 insertions(+), 23 deletions(-)

diff --git a/loopy/schedule/checker/__init__.py b/loopy/schedule/checker/__init__.py
index c138271f6..2684950d0 100644
--- a/loopy/schedule/checker/__init__.py
+++ b/loopy/schedule/checker/__init__.py
@@ -91,6 +91,7 @@ def get_schedules_for_statement_pairs(
         : 0 <= i < pi and 0 <= j < pj and 0 <= k < pk }
 
     """
+    # TODO update docs now that we're returning SIOs
 
     # {{{ make sure kernel has been preprocessed
 
diff --git a/loopy/schedule/checker/schedule.py b/loopy/schedule/checker/schedule.py
index c6f5e43e5..a59c579cc 100644
--- a/loopy/schedule/checker/schedule.py
+++ b/loopy/schedule/checker/schedule.py
@@ -148,7 +148,7 @@ def generate_pairwise_schedules(
         mappings from statement instances to lexicographic time, one for
         each of the two statements.
     """
-    # TODO update doc
+    # TODO update docs now that we're returning SIOs
 
     from loopy.schedule import (EnterLoop, LeaveLoop, Barrier, RunInstruction)
     from loopy.kernel.data import (LocalIndexTag, GroupIndexTag)
@@ -322,6 +322,7 @@ def _get_map_for_stmt(
 
     from loopy.schedule.checker.lexicographic_order_map import (
         create_lex_order_map,
+        get_statement_ordering_map,
     )
 
     pairwise_schedules = {}
@@ -358,16 +359,25 @@ def _get_map_for_stmt(
             in zip(insn_ids, lex_tuples_simplified, int_sids)
             ]
 
-        # TODO (moved func below up here to avoid passing extra info around)
-        # Benefit (e.g.): don't want to examine the schedule tuple in separate func
-        # below to re-determine which parallel
-        # dims are used. (could simplify everything by always using all dims, which
-        # would make maps more complex than necessary)
+        # Create lex order maps and SIOs here (rather than returning schedules
+        # and lex maps separately and combining them outside function to get
+        # SIOs) to avoid passing extra info around. Don't want to, e.g.,
+        # examine the schedule tuple in separate func to re-determine which
+        # parallel dims are used. (could simplify everything by always using
+        # all dims..., which would make maps more complex than necessary)
         lex_order_map = create_lex_order_map(
             after_names=seq_lex_dim_names,
             after_names_concurrent=conc_lex_dim_names,
             )
 
-        pairwise_schedules[tuple(insn_ids)] = (tuple(sched_maps), lex_order_map)
+        # Create statement instance ordering,
+        # maps each statement instance to all statement instances occuring later
+        sio = get_statement_ordering_map(
+            *sched_maps,  # note, func accepts exactly two maps
+            lex_order_map,
+            )
+
+        #pairwise_schedules[tuple(insn_ids)] = tuple(sched_maps)
+        pairwise_schedules[tuple(insn_ids)] = (sio, tuple(sched_maps))
 
     return pairwise_schedules
diff --git a/test/test_linearization_checker.py b/test/test_linearization_checker.py
index 495a1a3f2..6dfb68f68 100644
--- a/test/test_linearization_checker.py
+++ b/test/test_linearization_checker.py
@@ -138,7 +138,7 @@ def test_pairwise_schedule_creation():
     # Relationship between insn_a and insn_b ---------------------------------------
 
     # Get two maps
-    (sched_map_before, sched_map_after), sched_lex_order_map = sched_maps[
+    sio, (sched_map_before, sched_map_after) = sched_maps[
         ("insn_a", "insn_b")]
 
     # Create expected maps and compare
@@ -168,7 +168,7 @@ def test_pairwise_schedule_creation():
     # Relationship between insn_a and insn_c ---------------------------------------
 
     # Get two maps
-    (sched_map_before, sched_map_after), sched_lex_order_map = sched_maps[
+    sio, (sched_map_before, sched_map_after) = sched_maps[
         ("insn_a", "insn_c")]
 
     # Create expected maps and compare
@@ -198,7 +198,7 @@ def test_pairwise_schedule_creation():
     # Relationship between insn_a and insn_d ---------------------------------------
 
     # Get two maps
-    (sched_map_before, sched_map_after), sched_lex_order_map = sched_maps[
+    sio, (sched_map_before, sched_map_after) = sched_maps[
         ("insn_a", "insn_d")]
 
     # Create expected maps and compare
@@ -228,7 +228,7 @@ def test_pairwise_schedule_creation():
     # Relationship between insn_b and insn_c ---------------------------------------
 
     # Get two maps
-    (sched_map_before, sched_map_after), sched_lex_order_map = sched_maps[
+    sio, (sched_map_before, sched_map_after) = sched_maps[
         ("insn_b", "insn_c")]
 
     # Create expected maps and compare
@@ -258,7 +258,7 @@ def test_pairwise_schedule_creation():
     # Relationship between insn_b and insn_d ---------------------------------------
 
     # Get two maps
-    (sched_map_before, sched_map_after), sched_lex_order_map = sched_maps[
+    sio, (sched_map_before, sched_map_after) = sched_maps[
         ("insn_b", "insn_d")]
 
     # Create expected maps and compare
@@ -288,7 +288,7 @@ def test_pairwise_schedule_creation():
     # Relationship between insn_c and insn_d ---------------------------------------
 
     # Get two maps
-    (sched_map_before, sched_map_after), sched_lex_order_map = sched_maps[
+    sio, (sched_map_before, sched_map_after) = sched_maps[
         ("insn_c", "insn_d")]
 
     # Create expected maps and compare
@@ -363,7 +363,7 @@ def test_pairwise_schedule_creation_with_hw_par_tags():
     # Relationship between stmt_a and stmt_b ---------------------------------------
 
     # Get two maps
-    (sched_map_before, sched_map_after), sched_lex_order_map = sched_maps[
+    sio, (sched_map_before, sched_map_after) = sched_maps[
         ("stmt_a", "stmt_b")]
 
     # Create expected maps and compare
@@ -502,17 +502,9 @@ def _check_sio_for_stmt_pair(
     )
 
     # Get pairwise schedule
-    (sched_map_before, sched_map_after), sched_lex_order_map = sched_maps[
+    sio, (sched_map_before, sched_map_after) = sched_maps[
         (stmt_id_before, stmt_id_after)]
 
-    # Create statement instance ordering,
-    # maps each statement instance to all statement instances occuring later
-    sio = get_statement_ordering_map(
-        sched_map_before,
-        sched_map_after,
-        sched_lex_order_map,
-        )
-
     sio_aligned = ensure_dim_names_match_and_align(sio, expected_sio)
 
     assert sio_aligned == expected_sio
@@ -741,6 +733,32 @@ def test_statement_instance_ordering_with_hw_par_tags():
 
     # ------------------------------------------------------------------------------
 
+
+# TODO when testing happens-after-barrier map, make sure to test parameter assumption issues:
+"""
+>>> test_pair2 = append_marker_to_isl_map_var_names(isl.Map("[p] -> { [stmt' = 0, i'=1, j'=p-1] -> [stmt = 1] : p > 1 }"), isl.dim_type.in_, "'")
+>>> test_pair3 = append_marker_to_isl_map_var_names(isl.Map("[p] -> { [stmt' = 0, i'=1, j'=p-1] -> [stmt = 1] : p > 2 }"), isl.dim_type.in_, "'")
+>>> hab = append_marker_to_isl_map_var_names(isl.Map("[p] -> { [stmt' = 0, i', j'] -> [stmt = 1] : 0 <= i' < p and 0 <= j' <= -2 + p; [stmt' = 0, i', j' = -1 + p] -> [stmt = 1] : 0 <= i' <= -2 + p }"), isl.dim_type.in_, "'")
+>>> print(prettier_map_string(hab))
+[p] -> {
+[stmt' = 0, i', j'] -> [stmt = 1] : 0 <= i' < p and 0 <= j' <= -2 + p;
+[stmt' = 0, i', j' = -1 + p] -> [stmt = 1] : 0 <= i' <= -2 + p
+}
+>>> print(prettier_map_string(test_pair2))
+[p] -> {
+[stmt' = 0, i' = 1, j' = -1 + p] -> [stmt = 1] : p >= 2
+}
+>>> print(prettier_map_string(test_pair3))
+[p] -> {
+[stmt' = 0, i' = 1, j' = -1 + p] -> [stmt = 1] : p >= 3
+}
+>>> test_pair2.is_subset(hab)
+False
+>>> test_pair3.is_subset(hab)
+True
+"""
+
+
 # }}}
 
 

From c2e83e1c239bb990a62e2a6e72c2a42690f2c2b1 Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Sun, 21 Mar 2021 19:57:16 -0500
Subject: [PATCH 187/460] start incorporating the bulk of the new blex
 order/map functionality; still WIP and needs cleanup/tests

---
 .../checker/lexicographic_order_map.py        |  10 +-
 loopy/schedule/checker/schedule.py            | 356 ++++++++++++++++--
 loopy/schedule/checker/utils.py               |   2 +
 test/test_linearization_checker.py            | 147 ++++----
 4 files changed, 408 insertions(+), 107 deletions(-)

diff --git a/loopy/schedule/checker/lexicographic_order_map.py b/loopy/schedule/checker/lexicographic_order_map.py
index fb912cb7b..9add041c4 100644
--- a/loopy/schedule/checker/lexicographic_order_map.py
+++ b/loopy/schedule/checker/lexicographic_order_map.py
@@ -76,6 +76,7 @@ def get_lex_order_set(
         before_names_concurrent=[],
         after_names_concurrent=[],
         islvars=None,
+        conc_var_comparison_op="eq",
         ):
     """Return an :class:`islpy.Set` representing a lexicographic ordering
         with the number of dimensions provided in `before_names`
@@ -154,7 +155,7 @@ def get_lex_order_set(
     lex_order_set = lex_order_set & \
         create_elementwise_comparison_conjunction_set(
             before_names_concurrent, after_names_concurrent,
-            islvars, op="eq",
+            islvars, op=conc_var_comparison_op,
             )
 
     return lex_order_set
@@ -165,6 +166,8 @@ def create_lex_order_map(
         before_names=None,
         after_names=None,
         after_names_concurrent=[],
+        conc_var_comparison_op="eq",
+        in_dim_marker="'",
         ):
     """Return a map from each point in a lexicographic ordering to every
         point that occurs later in the lexicographic ordering.
@@ -201,11 +204,11 @@ def create_lex_order_map(
     if after_names is None:
         after_names = ["i%s" % (i) for i in range(n_dims)]
     if before_names is None:
-        before_names = append_marker_to_strings(after_names, marker="'")
+        before_names = append_marker_to_strings(after_names, marker=in_dim_marker)
     if n_dims is None:
         n_dims = len(after_names)
     before_names_concurrent = append_marker_to_strings(
-        after_names_concurrent, marker="'")
+        after_names_concurrent, marker=in_dim_marker)
 
     assert len(before_names) == len(after_names) == n_dims
     dim_type = isl.dim_type
@@ -214,6 +217,7 @@ def create_lex_order_map(
     lex_order_set = get_lex_order_set(
         before_names, after_names,
         before_names_concurrent, after_names_concurrent,
+        conc_var_comparison_op=conc_var_comparison_op,
         )
 
     # Now convert that set to a map.
diff --git a/loopy/schedule/checker/schedule.py b/loopy/schedule/checker/schedule.py
index a59c579cc..ec0efb9d8 100644
--- a/loopy/schedule/checker/schedule.py
+++ b/loopy/schedule/checker/schedule.py
@@ -21,6 +21,7 @@
 """
 
 import islpy as isl
+dt = isl.dim_type.set
 
 __doc__ = """
 
@@ -44,13 +45,21 @@
 
 LIN_CHECK_IDENTIFIER_PREFIX = "_lp_linchk_"
 LEX_VAR_PREFIX = "%slex" % (LIN_CHECK_IDENTIFIER_PREFIX)
+BLEX_VAR_PREFIX = "%sblex" % (LIN_CHECK_IDENTIFIER_PREFIX)
 STATEMENT_VAR_NAME = "%sstmt" % (LIN_CHECK_IDENTIFIER_PREFIX)
-# TODO document:
+BEFORE_MARK = "'"
 GTAG_VAR_NAMES = []
 LTAG_VAR_NAMES = []
 for par_level in [0, 1, 2]:
     GTAG_VAR_NAMES.append("%sgid%d" % (LIN_CHECK_IDENTIFIER_PREFIX, par_level))
     LTAG_VAR_NAMES.append("%slid%d" % (LIN_CHECK_IDENTIFIER_PREFIX, par_level))
+PRE = "pre"
+FIRST = "first"
+TOP = "top"
+BOTTOM = "bottom"
+LAST = "last"
+POST = "post"
+# TODO document new vars
 
 
 def _pad_tuple_with_zeros(tup, desired_length):
@@ -152,6 +161,10 @@ def generate_pairwise_schedules(
 
     from loopy.schedule import (EnterLoop, LeaveLoop, Barrier, RunInstruction)
     from loopy.kernel.data import (LocalIndexTag, GroupIndexTag)
+    from loopy.schedule.checker.lexicographic_order_map import (
+        create_lex_order_map,
+        get_statement_ordering_map,
+    )
 
     all_insn_ids = set().union(*insn_id_pairs)
 
@@ -162,7 +175,7 @@ def generate_pairwise_schedules(
     # For each statement, map the insn_id to a tuple representing points
     # in the lexicographic ordering containing items of :class:`int` or
     # :class:`str` :mod:`loopy` inames.
-    stmt_instances = {}
+    stmt_inst_to_lex = {}
 
     # Keep track of the next tuple of points in our lexicographic
     # ordering, initially this as a 1-d point with value 0
@@ -224,8 +237,8 @@ def generate_pairwise_schedules(
 
             # Only process listed insns, otherwise ignore
             if lp_insn_id in all_insn_ids:
-                # Add item to stmt_instances
-                stmt_instances[lp_insn_id] = tuple(next_insn_lex_tuple)
+                # Add item to stmt_inst_to_lex
+                stmt_inst_to_lex[lp_insn_id] = tuple(next_insn_lex_tuple)
 
                 # Increment lex dim val enumerating items in current section of code
                 next_insn_lex_tuple[-1] += 1
@@ -238,11 +251,10 @@ def generate_pairwise_schedules(
             pass
 
         # To save time, stop when we've found all statements
-        if len(stmt_instances.keys()) == len(all_insn_ids):
+        if len(stmt_inst_to_lex.keys()) == len(all_insn_ids):
+            # TODO if combining blex map creation with this pass, cannot stop early
             break
 
-    # Second, create pairwise schedules for each individual pair of insns
-
     # Get dim names representing local/group axes for this kernel,
     # and get the dictionary that will be used later to create a
     # constraint requiring {par inames == par axes} in sched
@@ -268,6 +280,243 @@ def generate_pairwise_schedules(
             continue
     conc_lex_dim_names = sorted(l_axes_used) + sorted(g_axes_used)
 
+    # {{{  Create blex ordering (may later be combined with pass above)
+
+    # {{{ Determine which loops contain barriers
+
+    loops_with_barriers = set()
+    current_inames = set()
+
+    for linearization_item in linearization_items:
+        if isinstance(linearization_item, EnterLoop):
+            current_inames.add(linearization_item.iname)
+        elif isinstance(linearization_item, LeaveLoop):
+            current_inames.remove(linearization_item.iname)
+        elif isinstance(linearization_item, Barrier):
+            loops_with_barriers |= current_inames
+            # At this point we could technically skip ahead to next enterloop
+
+    # }}}
+
+    # {{{ Get upper and lower bound for each loop that contains a barrier
+    # (Could try to combine this with pass below but would make things messy)
+
+    iname_bounds_pwaff = {}
+    blex_map_params = set()
+
+    for iname in loops_with_barriers:
+        # Get first and last vals for this iname
+        bounds = knl.get_iname_bounds(iname)
+        ubound = bounds.upper_bound_pw_aff
+        lbound = bounds.lower_bound_pw_aff
+        iname_bounds_pwaff[iname] = (lbound, ubound)
+        blex_map_params |= set(
+            lbound.get_var_names(dt.param) + ubound.get_var_names(dt.param))
+
+    blex_map_params = sorted(blex_map_params)
+
+    # }}}
+
+    # {{{ Construct blueprint for creating blex space and orderings
+    # TODO combine this pass over the linearization items with the pass above
+
+    stmt_inst_to_blex = {}
+    subtract_map_blueprint = {}
+
+    # Keep track of the next tuple of points in our blexicographic
+    # ordering, initially this as a 1-d point with value 0
+    next_blex_pt = [0]
+    n_blex_dims = 1
+    iname_to_blexdim = {}
+
+    for linearization_item in linearization_items:
+        if isinstance(linearization_item, EnterLoop):
+            enter_iname = linearization_item.iname
+            if enter_iname in loops_with_barriers:
+                # update next blex pt
+                pre_loop_blex_pt = next_blex_pt[:]
+                next_blex_pt[-1] += 1
+                next_blex_pt.append(enter_iname)
+                next_blex_pt.append(0)
+
+                # store tuples that will be used to create pairs
+                # that will later be subtracted from happens-before map
+                first_iter_blex_pt = next_blex_pt[:]
+                first_iter_blex_pt[-2] = iname_bounds_pwaff[enter_iname][0]
+                subtract_map_blueprint[enter_iname] = {
+                    PRE: tuple(pre_loop_blex_pt),  # make sure to copy
+                    TOP: tuple(next_blex_pt),  # make sure to copy
+                    FIRST: tuple(first_iter_blex_pt),  # make sure to copy
+                    }
+
+        elif isinstance(linearization_item, LeaveLoop):
+            leave_iname = linearization_item.iname
+            if leave_iname in loops_with_barriers:
+                # update max blex dims
+                n_blex_dims = max(n_blex_dims, len(next_blex_pt))
+                iname_to_blexdim[leave_iname] = len(next_blex_pt)-2
+
+                # update next blex pt
+                pre_end_loop_blex_pt = next_blex_pt[:]
+                next_blex_pt.pop()
+                next_blex_pt.pop()
+                next_blex_pt[-1] += 1
+
+                # store tuples that will be used to create pairs
+                # that will later be subtracted from happens-before map
+                last_iter_blex_pt = pre_end_loop_blex_pt[:]
+                last_iter_blex_pt[-2] = iname_bounds_pwaff[leave_iname][1]
+                subtract_map_blueprint[leave_iname][BOTTOM] = tuple(
+                    pre_end_loop_blex_pt)
+                subtract_map_blueprint[leave_iname][LAST] = tuple(last_iter_blex_pt)
+                subtract_map_blueprint[leave_iname][POST] = tuple(next_blex_pt)
+                # (make sure ^these are copies)
+
+        elif isinstance(linearization_item, RunInstruction):
+            # Add item to stmt_inst_to_blex
+            lp_insn_id = linearization_item.insn_id
+            stmt_inst_to_blex[lp_insn_id] = tuple(next_blex_pt)
+
+            # Don't increment blex dim val
+
+        elif isinstance(linearization_item, Barrier):
+
+            next_blex_pt[-1] += 1
+
+        else:
+            from loopy.schedule import (CallKernel, ReturnFromKernel)
+            # No action needed for these types of linearization item
+            assert isinstance(
+                linearization_item, (CallKernel, ReturnFromKernel))
+            pass
+
+    # }}}
+
+    # pad tuples w/zeros
+    for stmt, tup in stmt_inst_to_blex.items():
+        stmt_inst_to_blex[stmt] = _pad_tuple_with_zeros(tup, n_blex_dims)
+
+    # Create names for the blex dimensions for sequential loops
+    from loopy.schedule.checker.utils import (
+        append_marker_to_strings,
+    )
+    seq_blex_dim_names = [
+        BLEX_VAR_PREFIX+str(i) for i in range(n_blex_dims)]
+    seq_blex_dim_names_prime = append_marker_to_strings(
+        seq_blex_dim_names, marker=BEFORE_MARK)
+
+    blex_order_map = create_lex_order_map(
+        before_names=seq_blex_dim_names_prime,
+        after_names=seq_blex_dim_names,
+        after_names_concurrent=conc_lex_dim_names,
+        conc_var_comparison_op="ne",
+        in_dim_marker=BEFORE_MARK,
+        )
+
+    iname_to_blexvar = {}
+    for iname, dim in iname_to_blexdim.items():
+        iname_to_blexvar[iname] = seq_blex_dim_names[dim]
+        iname_to_blexvar[iname+BEFORE_MARK] = seq_blex_dim_names_prime[dim]
+
+    # Add params to blex map
+    blex_order_map = blex_order_map.add_dims(dt.param, len(blex_map_params))
+    for i, p in enumerate(blex_map_params):
+        blex_order_map = blex_order_map.set_dim_name(dt.param, i, p)
+
+    # get a set representing blex_order_map space
+    blex_set_template = isl.align_spaces(
+        isl.Map("[ ] -> { [ ] -> [ ] }"), blex_order_map
+        ).move_dims(
+        dt.in_, n_blex_dims, dt.out, 0, n_blex_dims
+        ).domain()
+    blex_set_affs = isl.affs_from_space(blex_set_template.space)
+
+    def _create_subtraction_map_for_iname(iname, blueprint):
+        # Note: blueprint[FIRST] and blueprint[LAST] contain pwaffs
+
+        def _create_blex_set_from_tuple_pair(before, after, wrap_cond=False):
+
+            # start with a set representing blex_order_map space
+            blex_set = blex_set_template.copy()
+
+            # add markers to inames in before tuple
+            # (assume strings are the inames)
+            before_prime = tuple(
+                v+BEFORE_MARK if isinstance(v, str) else v for v in before)
+            before_padded = _pad_tuple_with_zeros(before_prime, n_blex_dims)
+            after_padded = _pad_tuple_with_zeros(after, n_blex_dims)
+
+            # assign vals to dims
+            for dim_name, dim_val in zip(
+                    seq_blex_dim_names_prime+seq_blex_dim_names,
+                    before_padded+after_padded):
+                # (could exploit knowledge of content types of odd/even
+                # tuple dims to reduce conditionals but would be ugly
+                # and less robust)
+                if isinstance(dim_val, int):
+                    # set idx to int val
+                    blex_set &= blex_set_affs[dim_name].eq_set(
+                        blex_set_affs[0]+dim_val)
+                elif isinstance(dim_val, str):
+                    # assume this is an iname, set idx to corresponding blex var
+                    blex_set &= blex_set_affs[dim_name].eq_set(
+                        blex_set_affs[iname_to_blexvar[dim_val]])
+                else:
+                    assert isinstance(dim_val, isl.PwAff)
+                    pwaff_aligned = isl.align_spaces(dim_val, blex_set_affs[0])
+                    # (doesn't matter which element of blex_set_affs we use^)
+                    blex_set &= blex_set_affs[dim_name].eq_set(pwaff_aligned)
+
+            if wrap_cond:
+                # i = i' + step
+                # TODO what about step sizes != 1?
+                blex_set &= blex_set_affs[iname_to_blexvar[iname]].eq_set(
+                    blex_set_affs[iname_to_blexvar[iname+BEFORE_MARK]] + 1)
+
+            return blex_set
+
+        # enter loop case
+        full_blex_set = _create_blex_set_from_tuple_pair(
+            blueprint[PRE], blueprint[FIRST])
+        # wrap loop case
+        full_blex_set |= _create_blex_set_from_tuple_pair(
+            blueprint[BOTTOM], blueprint[TOP], wrap_cond=True)
+        # leave loop case
+        full_blex_set |= _create_blex_set_from_tuple_pair(
+            blueprint[LAST], blueprint[POST])
+
+        # add cond to fix iteration value for surrounding loops (i = i')
+        for surrounding_iname in blueprint[PRE][1::2]:
+            s_blex_var = iname_to_blexvar[surrounding_iname]
+            full_blex_set &= blex_set_affs[s_blex_var].eq_set(
+                blex_set_affs[s_blex_var+BEFORE_MARK])
+
+        # convert blex set back to map
+        return isl.Map.from_domain(full_blex_set).move_dims(
+            dt.out, 0, dt.in_, n_blex_dims, n_blex_dims)
+
+    # subtract unwanted pairs from happens-before blex map
+    maps_to_subtract = []
+    for iname, subdict in subtract_map_blueprint.items():
+        maps_to_subtract.append(_create_subtraction_map_for_iname(iname, subdict))
+
+    if maps_to_subtract:
+        # get union of maps
+        map_to_subtract = maps_to_subtract[0]
+        for other_map in maps_to_subtract[1:]:
+            map_to_subtract |= other_map
+
+        # get some closure
+        map_to_subtract, closure_exact = map_to_subtract.transitive_closure()
+        assert closure_exact  # TODO warn instead
+
+        # subtract from blex order map
+        blex_order_map = blex_order_map - map_to_subtract
+
+    # }}}  end blex order/map machinery
+
+    # Second, create pairwise schedules for each individual pair of insns
+
     from loopy.schedule.checker.utils import (
         sorted_union_of_names_in_isl_sets,
         create_symbolic_map_from_tuples,
@@ -275,7 +524,7 @@ def generate_pairwise_schedules(
     )
 
     def _get_map_for_stmt(
-            insn_id, lex_points, int_sid, seq_lex_dim_names, conc_lex_dim_names):
+            insn_id, lex_points, int_sid, lex_dim_names):
 
         # Get inames domain for statement instance (a BasicSet)
         dom = knl.get_inames_domain(
@@ -290,14 +539,14 @@ def _get_map_for_stmt(
         sched_space = isl.Space.create_from_names(
             isl.DEFAULT_CONTEXT,
             in_=in_names_sched,
-            out=seq_lex_dim_names+conc_lex_dim_names,
+            out=lex_dim_names,
             params=[],
             )
 
         # Insert 'statement' dim into domain so that its space allows
         # for intersection with sched map later
         dom_to_intersect = add_dims_to_isl_set(
-                dom, isl.dim_type.set, [STATEMENT_VAR_NAME], 0)
+                dom, dt.set, [STATEMENT_VAR_NAME], 0)
 
         # Each map will map statement instances -> lex time.
         # Right now, statement instance tuples consist of single int.
@@ -320,17 +569,19 @@ def _get_map_for_stmt(
 
         return sched_map
 
-    from loopy.schedule.checker.lexicographic_order_map import (
-        create_lex_order_map,
-        get_statement_ordering_map,
-    )
-
     pairwise_schedules = {}
     for insn_ids in insn_id_pairs:
-        lex_tuples = [stmt_instances[insn_id] for insn_id in insn_ids]
+        # Determine integer IDs that will represent each statement in mapping
+        # (dependency map creation assumes sid_before=0 and sid_after=1, unless
+        # before and after refer to same stmt, in which case sid_before=sid_after=0)
+        int_sids = [0, 0] if insn_ids[0] == insn_ids[1] else [0, 1]
+
+        # {{{  Create SIO for intra-thread case (lid0' == lid0, etc)
 
         # Simplify tuples to the extent possible ------------------------------------
 
+        lex_tuples = [stmt_inst_to_lex[insn_id] for insn_id in insn_ids]
+
         # At this point, one of the lex tuples may have more dimensions than another;
         # the missing dims are the fastest-updating dims, and their values should
         # be zero. Add them.
@@ -339,22 +590,18 @@ def _get_map_for_stmt(
             _pad_tuple_with_zeros(lex_tuple, max_lex_dims)
             for lex_tuple in lex_tuples]
 
-        lex_tuples_simplified = _simplify_lex_dims(*lex_tuples_padded)
-
         # Now generate maps from the blueprint --------------------------------------
 
+        lex_tuples_simplified = _simplify_lex_dims(*lex_tuples_padded)
+
         # Create names for the output dimensions for sequential loops
         seq_lex_dim_names = [
             LEX_VAR_PREFIX+str(i) for i in range(len(lex_tuples_simplified[0]))]
 
-        # Determine integer IDs that will represent each statement in mapping
-        # (dependency map creation assumes sid_before=0 and sid_after=1, unless
-        # before and after refer to same stmt, in which case sid_before=sid_after=0)
-        int_sids = [0, 0] if insn_ids[0] == insn_ids[1] else [0, 1]
-
-        sched_maps = [
+        intra_thread_sched_maps = [
             _get_map_for_stmt(
-                insn_id, lex_tuple, int_sid, seq_lex_dim_names, conc_lex_dim_names)
+                insn_id, lex_tuple, int_sid,
+                seq_lex_dim_names+conc_lex_dim_names)
             for insn_id, lex_tuple, int_sid
             in zip(insn_ids, lex_tuples_simplified, int_sids)
             ]
@@ -368,16 +615,67 @@ def _get_map_for_stmt(
         lex_order_map = create_lex_order_map(
             after_names=seq_lex_dim_names,
             after_names_concurrent=conc_lex_dim_names,
+            conc_var_comparison_op="eq",
+            in_dim_marker=BEFORE_MARK,
             )
 
         # Create statement instance ordering,
         # maps each statement instance to all statement instances occuring later
-        sio = get_statement_ordering_map(
-            *sched_maps,  # note, func accepts exactly two maps
+        sio_seq = get_statement_ordering_map(
+            *intra_thread_sched_maps,  # note, func accepts exactly two maps
             lex_order_map,
+            before_marker=BEFORE_MARK,
+            )
+
+        # }}}
+
+        # {{{  Create SIOs for inter-thread cases (lid0' != lid0, etc)
+
+        # TODO finish separating lid stuff from gid stuff
+
+        # NOTE: use *unsimplified* lex tuples with blex map
+
+        blex_tuples = [stmt_inst_to_blex[insn_id] for insn_id in insn_ids]
+
+        # At this point, one of the lex tuples may have more dimensions than another;
+        # the missing dims are the fastest-updating dims, and their values should
+        # be zero. Add them.
+        max_blex_dims = max([len(blex_tuple) for blex_tuple in blex_tuples])
+        blex_tuples_padded = [
+            _pad_tuple_with_zeros(blex_tuple, max_blex_dims)
+            for blex_tuple in blex_tuples]
+
+        # Create names for the output dimensions for sequential loops
+        seq_blex_dim_names = [
+            BLEX_VAR_PREFIX+str(i) for i in range(len(blex_tuples_padded[0]))]
+
+        lconc_sched_maps = [
+            _get_map_for_stmt(
+                insn_id, blex_tuple, int_sid,
+                seq_blex_dim_names+conc_lex_dim_names)  # conc dim names same for all
+            for insn_id, blex_tuple, int_sid
+            in zip(insn_ids, blex_tuples_padded, int_sids)
+            ]
+
+        # Create statement instance ordering
+        sio_lconc = get_statement_ordering_map(
+            *lconc_sched_maps,  # note, func accepts exactly two maps
+            blex_order_map,
+            before_marker=BEFORE_MARK,
             )
 
-        #pairwise_schedules[tuple(insn_ids)] = tuple(sched_maps)
-        pairwise_schedules[tuple(insn_ids)] = (sio, tuple(sched_maps))
+        # Create statement instance ordering
+        # TODO
+        #sio_gconc = get_statement_ordering_map(
+        #    *gconc_sched_maps,  # note, func accepts exactly two maps
+        #    g_blex_order_map,
+        #    before_marker=BEFORE_MARK,
+        #    )
+
+        # }}}
+
+        #pairwise_schedules[tuple(insn_ids)] = tuple(intra_thread_sched_maps)
+        pairwise_schedules[tuple(insn_ids)] = (
+            sio_seq, sio_lconc, tuple(intra_thread_sched_maps))
 
     return pairwise_schedules
diff --git a/loopy/schedule/checker/utils.py b/loopy/schedule/checker/utils.py
index b5cdb857a..39c7f48e0 100644
--- a/loopy/schedule/checker/utils.py
+++ b/loopy/schedule/checker/utils.py
@@ -277,6 +277,8 @@ def create_elementwise_comparison_conjunction_set(
     for n0, n1 in zip(names0, names1):
         if op == "eq":
             conj_set = conj_set & islvars[n0].eq_set(islvars[n1])
+        elif op == "ne":
+            conj_set = conj_set & islvars[n0].ne_set(islvars[n1])
         elif op == "lt":
             conj_set = conj_set & islvars[n0].lt_set(islvars[n1])
 
diff --git a/test/test_linearization_checker.py b/test/test_linearization_checker.py
index 6dfb68f68..caaa7bb43 100644
--- a/test/test_linearization_checker.py
+++ b/test/test_linearization_checker.py
@@ -138,12 +138,12 @@ def test_pairwise_schedule_creation():
     # Relationship between insn_a and insn_b ---------------------------------------
 
     # Get two maps
-    sio, (sched_map_before, sched_map_after) = sched_maps[
+    sio_seq, sio_lconc, (sched_map_before, sched_map_after) = sched_maps[
         ("insn_a", "insn_b")]
 
     # Create expected maps and compare
 
-    sched_map_before_expected = isl.Map(
+    sched_map_before_exp = isl.Map(
         "[pi, pk] -> { [%s=0, i, k] -> [%s] : 0 <= i < pi and 0 <= k < pk }"
         % (
             STATEMENT_VAR_NAME,
@@ -151,7 +151,7 @@ def test_pairwise_schedule_creation():
             )
         )
 
-    sched_map_after_expected = isl.Map(
+    sched_map_after_exp = isl.Map(
         "[pi, pj] -> { [%s=1, i, j] -> [%s] : 0 <= i < pi and 0 <= j < pj }"
         % (
             STATEMENT_VAR_NAME,
@@ -160,7 +160,7 @@ def test_pairwise_schedule_creation():
         )
 
     _align_and_compare_maps(
-        [sched_map_before_expected, sched_map_after_expected],
+        [sched_map_before_exp, sched_map_after_exp],
         [sched_map_before, sched_map_after],
         )
 
@@ -168,12 +168,12 @@ def test_pairwise_schedule_creation():
     # Relationship between insn_a and insn_c ---------------------------------------
 
     # Get two maps
-    sio, (sched_map_before, sched_map_after) = sched_maps[
+    sio_seq, sio_lconc, (sched_map_before, sched_map_after) = sched_maps[
         ("insn_a", "insn_c")]
 
     # Create expected maps and compare
 
-    sched_map_before_expected = isl.Map(
+    sched_map_before_exp = isl.Map(
         "[pi, pk] -> { [%s=0, i, k] -> [%s] : 0 <= i < pi and 0 <= k < pk }"
         % (
             STATEMENT_VAR_NAME,
@@ -181,7 +181,7 @@ def test_pairwise_schedule_creation():
             )
         )
 
-    sched_map_after_expected = isl.Map(
+    sched_map_after_exp = isl.Map(
         "[pi, pj] -> { [%s=1, i, j] -> [%s] : 0 <= i < pi and 0 <= j < pj }"
         % (
             STATEMENT_VAR_NAME,
@@ -190,7 +190,7 @@ def test_pairwise_schedule_creation():
         )
 
     _align_and_compare_maps(
-        [sched_map_before_expected, sched_map_after_expected],
+        [sched_map_before_exp, sched_map_after_exp],
         [sched_map_before, sched_map_after],
         )
 
@@ -198,12 +198,12 @@ def test_pairwise_schedule_creation():
     # Relationship between insn_a and insn_d ---------------------------------------
 
     # Get two maps
-    sio, (sched_map_before, sched_map_after) = sched_maps[
+    sio_seq, sio_lconc, (sched_map_before, sched_map_after) = sched_maps[
         ("insn_a", "insn_d")]
 
     # Create expected maps and compare
 
-    sched_map_before_expected = isl.Map(
+    sched_map_before_exp = isl.Map(
         "[pi, pk] -> { [%s=0, i, k] -> [%s] : 0 <= i < pi and 0 <= k < pk }"
         % (
             STATEMENT_VAR_NAME,
@@ -211,7 +211,7 @@ def test_pairwise_schedule_creation():
             )
         )
 
-    sched_map_after_expected = isl.Map(
+    sched_map_after_exp = isl.Map(
         "[pt] -> { [%s=1, t] -> [%s] : 0 <= t < pt }"
         % (
             STATEMENT_VAR_NAME,
@@ -220,7 +220,7 @@ def test_pairwise_schedule_creation():
         )
 
     _align_and_compare_maps(
-        [sched_map_before_expected, sched_map_after_expected],
+        [sched_map_before_exp, sched_map_after_exp],
         [sched_map_before, sched_map_after],
         )
 
@@ -228,12 +228,12 @@ def test_pairwise_schedule_creation():
     # Relationship between insn_b and insn_c ---------------------------------------
 
     # Get two maps
-    sio, (sched_map_before, sched_map_after) = sched_maps[
+    sio_seq, sio_lconc, (sched_map_before, sched_map_after) = sched_maps[
         ("insn_b", "insn_c")]
 
     # Create expected maps and compare
 
-    sched_map_before_expected = isl.Map(
+    sched_map_before_exp = isl.Map(
         "[pi, pj] -> { [%s=0, i, j] -> [%s] : 0 <= i < pi and 0 <= j < pj }"
         % (
             STATEMENT_VAR_NAME,
@@ -241,7 +241,7 @@ def test_pairwise_schedule_creation():
             )
         )
 
-    sched_map_after_expected = isl.Map(
+    sched_map_after_exp = isl.Map(
         "[pi, pj] -> { [%s=1, i, j] -> [%s] : 0 <= i < pi and 0 <= j < pj }"
         % (
             STATEMENT_VAR_NAME,
@@ -250,7 +250,7 @@ def test_pairwise_schedule_creation():
         )
 
     _align_and_compare_maps(
-        [sched_map_before_expected, sched_map_after_expected],
+        [sched_map_before_exp, sched_map_after_exp],
         [sched_map_before, sched_map_after],
         )
 
@@ -258,12 +258,12 @@ def test_pairwise_schedule_creation():
     # Relationship between insn_b and insn_d ---------------------------------------
 
     # Get two maps
-    sio, (sched_map_before, sched_map_after) = sched_maps[
+    sio_seq, sio_lconc, (sched_map_before, sched_map_after) = sched_maps[
         ("insn_b", "insn_d")]
 
     # Create expected maps and compare
 
-    sched_map_before_expected = isl.Map(
+    sched_map_before_exp = isl.Map(
         "[pi, pj] -> { [%s=0, i, j] -> [%s] : 0 <= i < pi and 0 <= j < pj }"
         % (
             STATEMENT_VAR_NAME,
@@ -271,7 +271,7 @@ def test_pairwise_schedule_creation():
             )
         )
 
-    sched_map_after_expected = isl.Map(
+    sched_map_after_exp = isl.Map(
         "[pt] -> { [%s=1, t] -> [%s] : 0 <= t < pt }"
         % (
             STATEMENT_VAR_NAME,
@@ -280,7 +280,7 @@ def test_pairwise_schedule_creation():
         )
 
     _align_and_compare_maps(
-        [sched_map_before_expected, sched_map_after_expected],
+        [sched_map_before_exp, sched_map_after_exp],
         [sched_map_before, sched_map_after],
         )
 
@@ -288,12 +288,12 @@ def test_pairwise_schedule_creation():
     # Relationship between insn_c and insn_d ---------------------------------------
 
     # Get two maps
-    sio, (sched_map_before, sched_map_after) = sched_maps[
+    sio_seq, sio_lconc, (sched_map_before, sched_map_after) = sched_maps[
         ("insn_c", "insn_d")]
 
     # Create expected maps and compare
 
-    sched_map_before_expected = isl.Map(
+    sched_map_before_exp = isl.Map(
         "[pi, pj] -> { [%s=0, i, j] -> [%s] : 0 <= i < pi and 0 <= j < pj }"
         % (
             STATEMENT_VAR_NAME,
@@ -301,7 +301,7 @@ def test_pairwise_schedule_creation():
             )
         )
 
-    sched_map_after_expected = isl.Map(
+    sched_map_after_exp = isl.Map(
         "[pt] -> { [%s=1, t] -> [%s] : 0 <= t < pt }"
         % (
             STATEMENT_VAR_NAME,
@@ -310,7 +310,7 @@ def test_pairwise_schedule_creation():
         )
 
     _align_and_compare_maps(
-        [sched_map_before_expected, sched_map_after_expected],
+        [sched_map_before_exp, sched_map_after_exp],
         [sched_map_before, sched_map_after],
         )
 
@@ -363,12 +363,12 @@ def test_pairwise_schedule_creation_with_hw_par_tags():
     # Relationship between stmt_a and stmt_b ---------------------------------------
 
     # Get two maps
-    sio, (sched_map_before, sched_map_after) = sched_maps[
+    sio_seq, sio_lconc, (sched_map_before, sched_map_after) = sched_maps[
         ("stmt_a", "stmt_b")]
 
     # Create expected maps and compare
 
-    sched_map_before_expected = isl.Map(
+    sched_map_before_exp = isl.Map(
         "[pi,pj] -> {[%s=0,i,ii,j,jj] -> [%s] : 0 <= i,ii < pi and 0 <= j,jj < pj}"
         % (
             STATEMENT_VAR_NAME,
@@ -376,7 +376,7 @@ def test_pairwise_schedule_creation_with_hw_par_tags():
             )
         )
 
-    sched_map_after_expected = isl.Map(
+    sched_map_after_exp = isl.Map(
         "[pi,pj] -> {[%s=1,i,ii,j,jj] -> [%s] : 0 <= i,ii < pi and 0 <= j,jj < pj}"
         % (
             STATEMENT_VAR_NAME,
@@ -385,7 +385,7 @@ def test_pairwise_schedule_creation_with_hw_par_tags():
         )
 
     _align_and_compare_maps(
-        [sched_map_before_expected, sched_map_after_expected],
+        [sched_map_before_exp, sched_map_after_exp],
         [sched_map_before, sched_map_after],
         )
 
@@ -406,11 +406,11 @@ def test_lex_order_map_creation():
     )
 
     def _check_lex_map(
-            expected_lex_order_map, n_dims, lid_axes_used=[], gid_axes_used=[]):
+            exp_lex_order_map, n_dims, lid_axes_used=[], gid_axes_used=[]):
 
         # Isl ignores the apostrophes, so explicitly add them
-        expected_lex_order_map = append_marker_to_isl_map_var_names(
-            expected_lex_order_map, isl.dim_type.in_, "'")
+        exp_lex_order_map = append_marker_to_isl_map_var_names(
+            exp_lex_order_map, isl.dim_type.in_, "'")
 
         lex_order_map = create_lex_order_map(
             n_dims=n_dims,
@@ -421,15 +421,15 @@ def _check_lex_map(
                 GTAG_VAR_NAMES[i] for i in gid_axes_used],
             )
 
-        assert lex_order_map == expected_lex_order_map
+        assert lex_order_map == exp_lex_order_map
         assert (
             lex_order_map.get_var_names(isl.dim_type.in_) ==
-            expected_lex_order_map.get_var_names(isl.dim_type.in_))
+            exp_lex_order_map.get_var_names(isl.dim_type.in_))
         assert (
             lex_order_map.get_var_names(isl.dim_type.out) ==
-            expected_lex_order_map.get_var_names(isl.dim_type.out))
+            exp_lex_order_map.get_var_names(isl.dim_type.out))
 
-    expected_lex_order_map = isl.Map(
+    exp_lex_order_map = isl.Map(
         "{{ "
         "[{0}0', {0}1', {0}2', {0}3', {0}4'] -> [{0}0, {0}1, {0}2, {0}3, {0}4] :"
         "("
@@ -445,9 +445,9 @@ def _check_lex_map(
         ")"
         "}}".format(LEX_VAR_PREFIX))
 
-    _check_lex_map(expected_lex_order_map, 5)
+    _check_lex_map(exp_lex_order_map, 5)
 
-    expected_lex_order_map = isl.Map(
+    exp_lex_order_map = isl.Map(
         "{{ "
         "[{0}0'] -> [{0}0] :"
         "("
@@ -455,7 +455,7 @@ def _check_lex_map(
         ")"
         "}}".format(LEX_VAR_PREFIX))
 
-    _check_lex_map(expected_lex_order_map, 1)
+    _check_lex_map(exp_lex_order_map, 1)
 
     # Lex map for kernel with parallel HW tags
 
@@ -464,7 +464,7 @@ def _check_lex_map(
     hw_par_lex_vars = [
         LTAG_VAR_NAMES[i] for i in lid_axes_used] + [
         GTAG_VAR_NAMES[i] for i in gid_axes_used]
-    expected_lex_order_map = isl.Map(
+    exp_lex_order_map = isl.Map(
         "{{ "
         "[{0}0', {0}1', {0}2', {1}', {2}', {3}', {4}', {5}'] "
         "-> [{0}0, {0}1, {0}2, {1}, {2}, {3}, {4}, {5}] :"
@@ -480,7 +480,7 @@ def _check_lex_map(
         "}}".format(LEX_VAR_PREFIX, *hw_par_lex_vars))
 
     _check_lex_map(
-        expected_lex_order_map, 3,
+        exp_lex_order_map, 3,
         lid_axes_used=lid_axes_used, gid_axes_used=gid_axes_used)
 
 # }}}
@@ -489,25 +489,22 @@ def _check_lex_map(
 # {{{ test statement instance ordering creation
 
 def _check_sio_for_stmt_pair(
-        expected_sio,
+        exp_sio,
         stmt_id_before,
         stmt_id_after,
         sched_maps,
         ):
-    from loopy.schedule.checker.lexicographic_order_map import (
-        get_statement_ordering_map,
-    )
     from loopy.schedule.checker.utils import (
         ensure_dim_names_match_and_align,
     )
 
     # Get pairwise schedule
-    sio, (sched_map_before, sched_map_after) = sched_maps[
+    sio_seq, sio_lconc, (sched_map_before, sched_map_after) = sched_maps[
         (stmt_id_before, stmt_id_after)]
 
-    sio_aligned = ensure_dim_names_match_and_align(sio, expected_sio)
+    sio_seq_aligned = ensure_dim_names_match_and_align(sio_seq, exp_sio)
 
-    assert sio_aligned == expected_sio
+    assert sio_seq_aligned == exp_sio
 
 
 def test_statement_instance_ordering():
@@ -573,49 +570,49 @@ def test_statement_instance_ordering():
 
     # Relationship between stmt_a and stmt_b ---------------------------------------
 
-    expected_sio = isl.Map(
+    exp_sio_seq = isl.Map(
         "[pi, pj, pk] -> {{ "
         "[{0}'=0, i', k'] -> [{0}=1, i, j] : "
         "0 <= i,i' < pi and 0 <= k' < pk and 0 <= j < pj and i >= i' "
         "}}".format(STATEMENT_VAR_NAME)
         )
     # isl ignores these apostrophes, so explicitly add them
-    expected_sio = append_marker_to_isl_map_var_names(
-        expected_sio, isl.dim_type.in_, "'")
+    exp_sio_seq = append_marker_to_isl_map_var_names(
+        exp_sio_seq, isl.dim_type.in_, "'")
 
-    _check_sio_for_stmt_pair(expected_sio, "stmt_a", "stmt_b", sched_maps)
+    _check_sio_for_stmt_pair(exp_sio_seq, "stmt_a", "stmt_b", sched_maps)
 
     # Relationship between stmt_a and stmt_c ---------------------------------------
 
-    expected_sio = isl.Map(
+    exp_sio_seq = isl.Map(
         "[pi, pj, pk] -> {{ "
         "[{0}'=0, i', k'] -> [{0}=1, i, j] : "
         "0 <= i,i' < pi and 0 <= k' < pk and 0 <= j < pj and i >= i' "
         "}}".format(STATEMENT_VAR_NAME)
         )
     # isl ignores these apostrophes, so explicitly add them
-    expected_sio = append_marker_to_isl_map_var_names(
-        expected_sio, isl.dim_type.in_, "'")
+    exp_sio_seq = append_marker_to_isl_map_var_names(
+        exp_sio_seq, isl.dim_type.in_, "'")
 
-    _check_sio_for_stmt_pair(expected_sio, "stmt_a", "stmt_c", sched_maps)
+    _check_sio_for_stmt_pair(exp_sio_seq, "stmt_a", "stmt_c", sched_maps)
 
     # Relationship between stmt_a and stmt_d ---------------------------------------
 
-    expected_sio = isl.Map(
+    exp_sio_seq = isl.Map(
         "[pt, pi, pk] -> {{ "
         "[{0}'=0, i', k'] -> [{0}=1, t] : "
         "0 <= i' < pi and 0 <= k' < pk and 0 <= t < pt "
         "}}".format(STATEMENT_VAR_NAME)
         )
     # isl ignores these apostrophes, so explicitly add them
-    expected_sio = append_marker_to_isl_map_var_names(
-        expected_sio, isl.dim_type.in_, "'")
+    exp_sio_seq = append_marker_to_isl_map_var_names(
+        exp_sio_seq, isl.dim_type.in_, "'")
 
-    _check_sio_for_stmt_pair(expected_sio, "stmt_a", "stmt_d", sched_maps)
+    _check_sio_for_stmt_pair(exp_sio_seq, "stmt_a", "stmt_d", sched_maps)
 
     # Relationship between stmt_b and stmt_c ---------------------------------------
 
-    expected_sio = isl.Map(
+    exp_sio_seq = isl.Map(
         "[pi, pj] -> {{ "
         "[{0}'=0, i', j'] -> [{0}=1, i, j] : "
         "0 <= i,i' < pi and 0 <= j,j' < pj and i > i'; "
@@ -624,38 +621,38 @@ def test_statement_instance_ordering():
         "}}".format(STATEMENT_VAR_NAME)
         )
     # isl ignores these apostrophes, so explicitly add them
-    expected_sio = append_marker_to_isl_map_var_names(
-        expected_sio, isl.dim_type.in_, "'")
+    exp_sio_seq = append_marker_to_isl_map_var_names(
+        exp_sio_seq, isl.dim_type.in_, "'")
 
-    _check_sio_for_stmt_pair(expected_sio, "stmt_b", "stmt_c", sched_maps)
+    _check_sio_for_stmt_pair(exp_sio_seq, "stmt_b", "stmt_c", sched_maps)
 
     # Relationship between stmt_b and stmt_d ---------------------------------------
 
-    expected_sio = isl.Map(
+    exp_sio_seq = isl.Map(
         "[pt, pi, pj] -> {{ "
         "[{0}'=0, i', j'] -> [{0}=1, t] : "
         "0 <= i' < pi and 0 <= j' < pj and 0 <= t < pt "
         "}}".format(STATEMENT_VAR_NAME)
         )
     # isl ignores these apostrophes, so explicitly add them
-    expected_sio = append_marker_to_isl_map_var_names(
-        expected_sio, isl.dim_type.in_, "'")
+    exp_sio_seq = append_marker_to_isl_map_var_names(
+        exp_sio_seq, isl.dim_type.in_, "'")
 
-    _check_sio_for_stmt_pair(expected_sio, "stmt_b", "stmt_d", sched_maps)
+    _check_sio_for_stmt_pair(exp_sio_seq, "stmt_b", "stmt_d", sched_maps)
 
     # Relationship between stmt_c and stmt_d ---------------------------------------
 
-    expected_sio = isl.Map(
+    exp_sio_seq = isl.Map(
         "[pt, pi, pj] -> {{ "
         "[{0}'=0, i', j'] -> [{0}=1, t] : "
         "0 <= i' < pi and 0 <= j' < pj and 0 <= t < pt "
         "}}".format(STATEMENT_VAR_NAME)
         )
     # isl ignores these apostrophes, so explicitly add them
-    expected_sio = append_marker_to_isl_map_var_names(
-        expected_sio, isl.dim_type.in_, "'")
+    exp_sio_seq = append_marker_to_isl_map_var_names(
+        exp_sio_seq, isl.dim_type.in_, "'")
 
-    _check_sio_for_stmt_pair(expected_sio, "stmt_c", "stmt_d", sched_maps)
+    _check_sio_for_stmt_pair(exp_sio_seq, "stmt_c", "stmt_d", sched_maps)
 
 
 def test_statement_instance_ordering_with_hw_par_tags():
@@ -715,7 +712,7 @@ def test_statement_instance_ordering_with_hw_par_tags():
 
     # Relationship between stmt_a and stmt_b ---------------------------------------
 
-    expected_sio = isl.Map(
+    exp_sio_seq = isl.Map(
         "[pi, pj] -> {{ "
         "[{0}'=0, i', ii', j', jj'] -> [{0}=1, i, ii, j, jj] : "
         "0 <= i,ii,i',ii' < pi and 0 <= j,jj,j',jj' < pj and ii >= ii' "
@@ -726,10 +723,10 @@ def test_statement_instance_ordering_with_hw_par_tags():
             )
         )
     # isl ignores these apostrophes, so explicitly add them
-    expected_sio = append_marker_to_isl_map_var_names(
-        expected_sio, isl.dim_type.in_, "'")
+    exp_sio_seq = append_marker_to_isl_map_var_names(
+        exp_sio_seq, isl.dim_type.in_, "'")
 
-    _check_sio_for_stmt_pair(expected_sio, "stmt_a", "stmt_b", sched_maps)
+    _check_sio_for_stmt_pair(exp_sio_seq, "stmt_a", "stmt_b", sched_maps)
 
     # ------------------------------------------------------------------------------
 

From 56cb55577bd239e97725026e758a727b0d5ca705 Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Mon, 22 Mar 2021 12:23:02 -0500
Subject: [PATCH 188/460] rename blex related variables to lblex since they
 will need to be separated from (global) gblex stuff

---
 loopy/schedule/checker/schedule.py | 182 ++++++++++++++---------------
 1 file changed, 91 insertions(+), 91 deletions(-)

diff --git a/loopy/schedule/checker/schedule.py b/loopy/schedule/checker/schedule.py
index ec0efb9d8..1980705c4 100644
--- a/loopy/schedule/checker/schedule.py
+++ b/loopy/schedule/checker/schedule.py
@@ -302,7 +302,7 @@ def generate_pairwise_schedules(
     # (Could try to combine this with pass below but would make things messy)
 
     iname_bounds_pwaff = {}
-    blex_map_params = set()
+    lblex_map_params = set()
 
     for iname in loops_with_barriers:
         # Get first and last vals for this iname
@@ -310,78 +310,78 @@ def generate_pairwise_schedules(
         ubound = bounds.upper_bound_pw_aff
         lbound = bounds.lower_bound_pw_aff
         iname_bounds_pwaff[iname] = (lbound, ubound)
-        blex_map_params |= set(
+        lblex_map_params |= set(
             lbound.get_var_names(dt.param) + ubound.get_var_names(dt.param))
 
-    blex_map_params = sorted(blex_map_params)
+    lblex_map_params = sorted(lblex_map_params)
 
     # }}}
 
     # {{{ Construct blueprint for creating blex space and orderings
     # TODO combine this pass over the linearization items with the pass above
 
-    stmt_inst_to_blex = {}
-    subtract_map_blueprint = {}
+    stmt_inst_to_lblex = {}
+    lblex_exclusion_info = {}
 
     # Keep track of the next tuple of points in our blexicographic
     # ordering, initially this as a 1-d point with value 0
-    next_blex_pt = [0]
-    n_blex_dims = 1
-    iname_to_blexdim = {}
+    next_lblex_pt = [0]
+    n_lblex_dims = 1
+    iname_to_lblex_dim = {}
 
     for linearization_item in linearization_items:
         if isinstance(linearization_item, EnterLoop):
             enter_iname = linearization_item.iname
             if enter_iname in loops_with_barriers:
                 # update next blex pt
-                pre_loop_blex_pt = next_blex_pt[:]
-                next_blex_pt[-1] += 1
-                next_blex_pt.append(enter_iname)
-                next_blex_pt.append(0)
+                pre_loop_lblex_pt = next_lblex_pt[:]
+                next_lblex_pt[-1] += 1
+                next_lblex_pt.append(enter_iname)
+                next_lblex_pt.append(0)
 
                 # store tuples that will be used to create pairs
                 # that will later be subtracted from happens-before map
-                first_iter_blex_pt = next_blex_pt[:]
-                first_iter_blex_pt[-2] = iname_bounds_pwaff[enter_iname][0]
-                subtract_map_blueprint[enter_iname] = {
-                    PRE: tuple(pre_loop_blex_pt),  # make sure to copy
-                    TOP: tuple(next_blex_pt),  # make sure to copy
-                    FIRST: tuple(first_iter_blex_pt),  # make sure to copy
+                first_iter_lblex_pt = next_lblex_pt[:]
+                first_iter_lblex_pt[-2] = iname_bounds_pwaff[enter_iname][0]
+                lblex_exclusion_info[enter_iname] = {
+                    PRE: tuple(pre_loop_lblex_pt),  # make sure to copy
+                    TOP: tuple(next_lblex_pt),  # make sure to copy
+                    FIRST: tuple(first_iter_lblex_pt),  # make sure to copy
                     }
 
         elif isinstance(linearization_item, LeaveLoop):
             leave_iname = linearization_item.iname
             if leave_iname in loops_with_barriers:
                 # update max blex dims
-                n_blex_dims = max(n_blex_dims, len(next_blex_pt))
-                iname_to_blexdim[leave_iname] = len(next_blex_pt)-2
+                n_lblex_dims = max(n_lblex_dims, len(next_lblex_pt))
+                iname_to_lblex_dim[leave_iname] = len(next_lblex_pt)-2
 
                 # update next blex pt
-                pre_end_loop_blex_pt = next_blex_pt[:]
-                next_blex_pt.pop()
-                next_blex_pt.pop()
-                next_blex_pt[-1] += 1
+                pre_end_loop_lblex_pt = next_lblex_pt[:]
+                next_lblex_pt.pop()
+                next_lblex_pt.pop()
+                next_lblex_pt[-1] += 1
 
                 # store tuples that will be used to create pairs
                 # that will later be subtracted from happens-before map
-                last_iter_blex_pt = pre_end_loop_blex_pt[:]
-                last_iter_blex_pt[-2] = iname_bounds_pwaff[leave_iname][1]
-                subtract_map_blueprint[leave_iname][BOTTOM] = tuple(
-                    pre_end_loop_blex_pt)
-                subtract_map_blueprint[leave_iname][LAST] = tuple(last_iter_blex_pt)
-                subtract_map_blueprint[leave_iname][POST] = tuple(next_blex_pt)
+                last_iter_lblex_pt = pre_end_loop_lblex_pt[:]
+                last_iter_lblex_pt[-2] = iname_bounds_pwaff[leave_iname][1]
+                lblex_exclusion_info[leave_iname][BOTTOM] = tuple(
+                    pre_end_loop_lblex_pt)
+                lblex_exclusion_info[leave_iname][LAST] = tuple(last_iter_lblex_pt)
+                lblex_exclusion_info[leave_iname][POST] = tuple(next_lblex_pt)
                 # (make sure ^these are copies)
 
         elif isinstance(linearization_item, RunInstruction):
-            # Add item to stmt_inst_to_blex
+            # Add item to stmt_inst_to_lblex
             lp_insn_id = linearization_item.insn_id
-            stmt_inst_to_blex[lp_insn_id] = tuple(next_blex_pt)
+            stmt_inst_to_lblex[lp_insn_id] = tuple(next_lblex_pt)
 
             # Don't increment blex dim val
 
         elif isinstance(linearization_item, Barrier):
 
-            next_blex_pt[-1] += 1
+            next_lblex_pt[-1] += 1
 
         else:
             from loopy.schedule import (CallKernel, ReturnFromKernel)
@@ -393,43 +393,43 @@ def generate_pairwise_schedules(
     # }}}
 
     # pad tuples w/zeros
-    for stmt, tup in stmt_inst_to_blex.items():
-        stmt_inst_to_blex[stmt] = _pad_tuple_with_zeros(tup, n_blex_dims)
+    for stmt, tup in stmt_inst_to_lblex.items():
+        stmt_inst_to_lblex[stmt] = _pad_tuple_with_zeros(tup, n_lblex_dims)
 
     # Create names for the blex dimensions for sequential loops
     from loopy.schedule.checker.utils import (
         append_marker_to_strings,
     )
-    seq_blex_dim_names = [
-        BLEX_VAR_PREFIX+str(i) for i in range(n_blex_dims)]
-    seq_blex_dim_names_prime = append_marker_to_strings(
-        seq_blex_dim_names, marker=BEFORE_MARK)
-
-    blex_order_map = create_lex_order_map(
-        before_names=seq_blex_dim_names_prime,
-        after_names=seq_blex_dim_names,
+    seq_lblex_dim_names = [
+        BLEX_VAR_PREFIX+str(i) for i in range(n_lblex_dims)]
+    seq_lblex_dim_names_prime = append_marker_to_strings(
+        seq_lblex_dim_names, marker=BEFORE_MARK)
+
+    lblex_order_map = create_lex_order_map(
+        before_names=seq_lblex_dim_names_prime,
+        after_names=seq_lblex_dim_names,
         after_names_concurrent=conc_lex_dim_names,
         conc_var_comparison_op="ne",
         in_dim_marker=BEFORE_MARK,
         )
 
-    iname_to_blexvar = {}
-    for iname, dim in iname_to_blexdim.items():
-        iname_to_blexvar[iname] = seq_blex_dim_names[dim]
-        iname_to_blexvar[iname+BEFORE_MARK] = seq_blex_dim_names_prime[dim]
+    iname_to_lblex_var = {}
+    for iname, dim in iname_to_lblex_dim.items():
+        iname_to_lblex_var[iname] = seq_lblex_dim_names[dim]
+        iname_to_lblex_var[iname+BEFORE_MARK] = seq_lblex_dim_names_prime[dim]
 
     # Add params to blex map
-    blex_order_map = blex_order_map.add_dims(dt.param, len(blex_map_params))
-    for i, p in enumerate(blex_map_params):
-        blex_order_map = blex_order_map.set_dim_name(dt.param, i, p)
+    lblex_order_map = lblex_order_map.add_dims(dt.param, len(lblex_map_params))
+    for i, p in enumerate(lblex_map_params):
+        lblex_order_map = lblex_order_map.set_dim_name(dt.param, i, p)
 
     # get a set representing blex_order_map space
-    blex_set_template = isl.align_spaces(
-        isl.Map("[ ] -> { [ ] -> [ ] }"), blex_order_map
+    lblex_set_template = isl.align_spaces(
+        isl.Map("[ ] -> { [ ] -> [ ] }"), lblex_order_map
         ).move_dims(
-        dt.in_, n_blex_dims, dt.out, 0, n_blex_dims
+        dt.in_, n_lblex_dims, dt.out, 0, n_lblex_dims
         ).domain()
-    blex_set_affs = isl.affs_from_space(blex_set_template.space)
+    lblex_set_affs = isl.affs_from_space(lblex_set_template.space)
 
     def _create_subtraction_map_for_iname(iname, blueprint):
         # Note: blueprint[FIRST] and blueprint[LAST] contain pwaffs
@@ -437,67 +437,67 @@ def _create_subtraction_map_for_iname(iname, blueprint):
         def _create_blex_set_from_tuple_pair(before, after, wrap_cond=False):
 
             # start with a set representing blex_order_map space
-            blex_set = blex_set_template.copy()
+            lblex_set = lblex_set_template.copy()
 
             # add markers to inames in before tuple
             # (assume strings are the inames)
             before_prime = tuple(
                 v+BEFORE_MARK if isinstance(v, str) else v for v in before)
-            before_padded = _pad_tuple_with_zeros(before_prime, n_blex_dims)
-            after_padded = _pad_tuple_with_zeros(after, n_blex_dims)
+            before_padded = _pad_tuple_with_zeros(before_prime, n_lblex_dims)
+            after_padded = _pad_tuple_with_zeros(after, n_lblex_dims)
 
             # assign vals to dims
             for dim_name, dim_val in zip(
-                    seq_blex_dim_names_prime+seq_blex_dim_names,
+                    seq_lblex_dim_names_prime+seq_lblex_dim_names,
                     before_padded+after_padded):
                 # (could exploit knowledge of content types of odd/even
                 # tuple dims to reduce conditionals but would be ugly
                 # and less robust)
                 if isinstance(dim_val, int):
                     # set idx to int val
-                    blex_set &= blex_set_affs[dim_name].eq_set(
-                        blex_set_affs[0]+dim_val)
+                    lblex_set &= lblex_set_affs[dim_name].eq_set(
+                        lblex_set_affs[0]+dim_val)
                 elif isinstance(dim_val, str):
                     # assume this is an iname, set idx to corresponding blex var
-                    blex_set &= blex_set_affs[dim_name].eq_set(
-                        blex_set_affs[iname_to_blexvar[dim_val]])
+                    lblex_set &= lblex_set_affs[dim_name].eq_set(
+                        lblex_set_affs[iname_to_lblex_var[dim_val]])
                 else:
                     assert isinstance(dim_val, isl.PwAff)
-                    pwaff_aligned = isl.align_spaces(dim_val, blex_set_affs[0])
-                    # (doesn't matter which element of blex_set_affs we use^)
-                    blex_set &= blex_set_affs[dim_name].eq_set(pwaff_aligned)
+                    pwaff_aligned = isl.align_spaces(dim_val, lblex_set_affs[0])
+                    # (doesn't matter which element of lblex_set_affs we use^)
+                    lblex_set &= lblex_set_affs[dim_name].eq_set(pwaff_aligned)
 
             if wrap_cond:
                 # i = i' + step
                 # TODO what about step sizes != 1?
-                blex_set &= blex_set_affs[iname_to_blexvar[iname]].eq_set(
-                    blex_set_affs[iname_to_blexvar[iname+BEFORE_MARK]] + 1)
+                lblex_set &= lblex_set_affs[iname_to_lblex_var[iname]].eq_set(
+                    lblex_set_affs[iname_to_lblex_var[iname+BEFORE_MARK]] + 1)
 
-            return blex_set
+            return lblex_set
 
         # enter loop case
-        full_blex_set = _create_blex_set_from_tuple_pair(
+        full_lblex_set = _create_blex_set_from_tuple_pair(
             blueprint[PRE], blueprint[FIRST])
         # wrap loop case
-        full_blex_set |= _create_blex_set_from_tuple_pair(
+        full_lblex_set |= _create_blex_set_from_tuple_pair(
             blueprint[BOTTOM], blueprint[TOP], wrap_cond=True)
         # leave loop case
-        full_blex_set |= _create_blex_set_from_tuple_pair(
+        full_lblex_set |= _create_blex_set_from_tuple_pair(
             blueprint[LAST], blueprint[POST])
 
         # add cond to fix iteration value for surrounding loops (i = i')
         for surrounding_iname in blueprint[PRE][1::2]:
-            s_blex_var = iname_to_blexvar[surrounding_iname]
-            full_blex_set &= blex_set_affs[s_blex_var].eq_set(
-                blex_set_affs[s_blex_var+BEFORE_MARK])
+            s_lblex_var = iname_to_lblex_var[surrounding_iname]
+            full_lblex_set &= lblex_set_affs[s_lblex_var].eq_set(
+                lblex_set_affs[s_lblex_var+BEFORE_MARK])
 
         # convert blex set back to map
-        return isl.Map.from_domain(full_blex_set).move_dims(
-            dt.out, 0, dt.in_, n_blex_dims, n_blex_dims)
+        return isl.Map.from_domain(full_lblex_set).move_dims(
+            dt.out, 0, dt.in_, n_lblex_dims, n_lblex_dims)
 
     # subtract unwanted pairs from happens-before blex map
     maps_to_subtract = []
-    for iname, subdict in subtract_map_blueprint.items():
+    for iname, subdict in lblex_exclusion_info.items():
         maps_to_subtract.append(_create_subtraction_map_for_iname(iname, subdict))
 
     if maps_to_subtract:
@@ -511,7 +511,7 @@ def _create_blex_set_from_tuple_pair(before, after, wrap_cond=False):
         assert closure_exact  # TODO warn instead
 
         # subtract from blex order map
-        blex_order_map = blex_order_map - map_to_subtract
+        lblex_order_map = lblex_order_map - map_to_subtract
 
     # }}}  end blex order/map machinery
 
@@ -635,32 +635,32 @@ def _get_map_for_stmt(
 
         # NOTE: use *unsimplified* lex tuples with blex map
 
-        blex_tuples = [stmt_inst_to_blex[insn_id] for insn_id in insn_ids]
+        lblex_tuples = [stmt_inst_to_lblex[insn_id] for insn_id in insn_ids]
 
         # At this point, one of the lex tuples may have more dimensions than another;
         # the missing dims are the fastest-updating dims, and their values should
         # be zero. Add them.
-        max_blex_dims = max([len(blex_tuple) for blex_tuple in blex_tuples])
-        blex_tuples_padded = [
-            _pad_tuple_with_zeros(blex_tuple, max_blex_dims)
-            for blex_tuple in blex_tuples]
+        max_lblex_dims = max([len(lblex_tuple) for lblex_tuple in lblex_tuples])
+        lblex_tuples_padded = [
+            _pad_tuple_with_zeros(lblex_tuple, max_lblex_dims)
+            for lblex_tuple in lblex_tuples]
 
         # Create names for the output dimensions for sequential loops
-        seq_blex_dim_names = [
-            BLEX_VAR_PREFIX+str(i) for i in range(len(blex_tuples_padded[0]))]
+        seq_lblex_dim_names = [
+            BLEX_VAR_PREFIX+str(i) for i in range(len(lblex_tuples_padded[0]))]
 
         lconc_sched_maps = [
             _get_map_for_stmt(
-                insn_id, blex_tuple, int_sid,
-                seq_blex_dim_names+conc_lex_dim_names)  # conc dim names same for all
-            for insn_id, blex_tuple, int_sid
-            in zip(insn_ids, blex_tuples_padded, int_sids)
+                insn_id, lblex_tuple, int_sid,
+                seq_lblex_dim_names+conc_lex_dim_names)  # conc names same for all
+            for insn_id, lblex_tuple, int_sid
+            in zip(insn_ids, lblex_tuples_padded, int_sids)
             ]
 
         # Create statement instance ordering
         sio_lconc = get_statement_ordering_map(
             *lconc_sched_maps,  # note, func accepts exactly two maps
-            blex_order_map,
+            lblex_order_map,
             before_marker=BEFORE_MARK,
             )
 
@@ -668,7 +668,7 @@ def _get_map_for_stmt(
         # TODO
         #sio_gconc = get_statement_ordering_map(
         #    *gconc_sched_maps,  # note, func accepts exactly two maps
-        #    g_blex_order_map,
+        #    gblex_order_map,
         #    before_marker=BEFORE_MARK,
         #    )
 

From 111ed536c790335e1a356df4ab61736f858e39a3 Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Mon, 22 Mar 2021 15:30:12 -0500
Subject: [PATCH 189/460] return sched maps for both lex and lblex orderings;
 collect params for lblex maps during instruction pass instead of before

---
 loopy/schedule/checker/schedule.py |  55 +++++++------
 test/test_linearization_checker.py | 128 ++++++++++++++++++-----------
 2 files changed, 110 insertions(+), 73 deletions(-)

diff --git a/loopy/schedule/checker/schedule.py b/loopy/schedule/checker/schedule.py
index 1980705c4..7f83c4032 100644
--- a/loopy/schedule/checker/schedule.py
+++ b/loopy/schedule/checker/schedule.py
@@ -284,7 +284,8 @@ def generate_pairwise_schedules(
 
     # {{{ Determine which loops contain barriers
 
-    loops_with_barriers = set()
+    loops_with_lbarriers = set()
+    loops_with_gbarriers = set()
     current_inames = set()
 
     for linearization_item in linearization_items:
@@ -293,7 +294,10 @@ def generate_pairwise_schedules(
         elif isinstance(linearization_item, LeaveLoop):
             current_inames.remove(linearization_item.iname)
         elif isinstance(linearization_item, Barrier):
-            loops_with_barriers |= current_inames
+            if linearization_item.synchronization_kind == "local":
+                loops_with_lbarriers |= current_inames
+            elif linearization_item.synchronization_kind == "global":
+                loops_with_gbarriers |= current_inames
             # At this point we could technically skip ahead to next enterloop
 
     # }}}
@@ -302,37 +306,29 @@ def generate_pairwise_schedules(
     # (Could try to combine this with pass below but would make things messy)
 
     iname_bounds_pwaff = {}
-    lblex_map_params = set()
-
-    for iname in loops_with_barriers:
+    for iname in loops_with_lbarriers:
         # Get first and last vals for this iname
         bounds = knl.get_iname_bounds(iname)
-        ubound = bounds.upper_bound_pw_aff
-        lbound = bounds.lower_bound_pw_aff
-        iname_bounds_pwaff[iname] = (lbound, ubound)
-        lblex_map_params |= set(
-            lbound.get_var_names(dt.param) + ubound.get_var_names(dt.param))
-
-    lblex_map_params = sorted(lblex_map_params)
+        iname_bounds_pwaff[iname] = (
+            bounds.lower_bound_pw_aff, bounds.upper_bound_pw_aff)
 
     # }}}
 
     # {{{ Construct blueprint for creating blex space and orderings
     # TODO combine this pass over the linearization items with the pass above
 
-    stmt_inst_to_lblex = {}
-    lblex_exclusion_info = {}
-
-    # Keep track of the next tuple of points in our blexicographic
-    # ordering, initially this as a 1-d point with value 0
-    next_lblex_pt = [0]
-    n_lblex_dims = 1
-    iname_to_lblex_dim = {}
+    stmt_inst_to_lblex = {}  # map stmt instances to lblex space
+    iname_to_lblex_dim = {}  # map from inames to corresponding lblex space dim
+    lblex_exclusion_info = {}  # info for creating pairs to subtract from lblex order
+    lblex_map_params = set()  # params needed in lblex map
+    next_lblex_pt = [0]  # next tuple of points in lblex order
+    n_lblex_dims = 1  # number of dims in lblex space
 
+    # do both lblex and gblex processing in single pass through insns
     for linearization_item in linearization_items:
         if isinstance(linearization_item, EnterLoop):
             enter_iname = linearization_item.iname
-            if enter_iname in loops_with_barriers:
+            if enter_iname in loops_with_lbarriers:
                 # update next blex pt
                 pre_loop_lblex_pt = next_lblex_pt[:]
                 next_lblex_pt[-1] += 1
@@ -341,17 +337,19 @@ def generate_pairwise_schedules(
 
                 # store tuples that will be used to create pairs
                 # that will later be subtracted from happens-before map
+                lbound = iname_bounds_pwaff[enter_iname][0]
                 first_iter_lblex_pt = next_lblex_pt[:]
-                first_iter_lblex_pt[-2] = iname_bounds_pwaff[enter_iname][0]
+                first_iter_lblex_pt[-2] = lbound
                 lblex_exclusion_info[enter_iname] = {
                     PRE: tuple(pre_loop_lblex_pt),  # make sure to copy
                     TOP: tuple(next_lblex_pt),  # make sure to copy
                     FIRST: tuple(first_iter_lblex_pt),  # make sure to copy
                     }
+                lblex_map_params |= set(lbound.get_var_names(dt.param))
 
         elif isinstance(linearization_item, LeaveLoop):
             leave_iname = linearization_item.iname
-            if leave_iname in loops_with_barriers:
+            if leave_iname in loops_with_lbarriers:
                 # update max blex dims
                 n_lblex_dims = max(n_lblex_dims, len(next_lblex_pt))
                 iname_to_lblex_dim[leave_iname] = len(next_lblex_pt)-2
@@ -364,13 +362,15 @@ def generate_pairwise_schedules(
 
                 # store tuples that will be used to create pairs
                 # that will later be subtracted from happens-before map
+                ubound = iname_bounds_pwaff[leave_iname][1]
                 last_iter_lblex_pt = pre_end_loop_lblex_pt[:]
-                last_iter_lblex_pt[-2] = iname_bounds_pwaff[leave_iname][1]
+                last_iter_lblex_pt[-2] = ubound
                 lblex_exclusion_info[leave_iname][BOTTOM] = tuple(
                     pre_end_loop_lblex_pt)
                 lblex_exclusion_info[leave_iname][LAST] = tuple(last_iter_lblex_pt)
                 lblex_exclusion_info[leave_iname][POST] = tuple(next_lblex_pt)
                 # (make sure ^these are copies)
+                lblex_map_params |= set(ubound.get_var_names(dt.param))
 
         elif isinstance(linearization_item, RunInstruction):
             # Add item to stmt_inst_to_lblex
@@ -390,6 +390,8 @@ def generate_pairwise_schedules(
                 linearization_item, (CallKernel, ReturnFromKernel))
             pass
 
+    lblex_map_params = sorted(lblex_map_params)
+
     # }}}
 
     # pad tuples w/zeros
@@ -674,8 +676,11 @@ def _get_map_for_stmt(
 
         # }}}
 
+        # TODO don't return sched maps?
         #pairwise_schedules[tuple(insn_ids)] = tuple(intra_thread_sched_maps)
         pairwise_schedules[tuple(insn_ids)] = (
-            sio_seq, sio_lconc, tuple(intra_thread_sched_maps))
+            (sio_seq, tuple(intra_thread_sched_maps), ),
+            (sio_lconc, tuple(lconc_sched_maps), )
+            )
 
     return pairwise_schedules
diff --git a/test/test_linearization_checker.py b/test/test_linearization_checker.py
index caaa7bb43..385f83b15 100644
--- a/test/test_linearization_checker.py
+++ b/test/test_linearization_checker.py
@@ -129,7 +129,7 @@ def test_pairwise_schedule_creation():
         ("insn_b", "insn_d"),
         ("insn_c", "insn_d"),
         ]
-    sched_maps = get_schedules_for_statement_pairs(
+    scheds = get_schedules_for_statement_pairs(
         lin_knl,
         linearization_items,
         insn_id_pairs,
@@ -138,12 +138,16 @@ def test_pairwise_schedule_creation():
     # Relationship between insn_a and insn_b ---------------------------------------
 
     # Get two maps
-    sio_seq, sio_lconc, (sched_map_before, sched_map_after) = sched_maps[
+    (
+        sio_seq, (sched_before, sched_after)
+    ), (
+        sio_lconc, (lconc_sched_before, lconc_sched_after)
+    ) = scheds[
         ("insn_a", "insn_b")]
 
     # Create expected maps and compare
 
-    sched_map_before_exp = isl.Map(
+    sched_before_exp = isl.Map(
         "[pi, pk] -> { [%s=0, i, k] -> [%s] : 0 <= i < pi and 0 <= k < pk }"
         % (
             STATEMENT_VAR_NAME,
@@ -151,7 +155,7 @@ def test_pairwise_schedule_creation():
             )
         )
 
-    sched_map_after_exp = isl.Map(
+    sched_after_exp = isl.Map(
         "[pi, pj] -> { [%s=1, i, j] -> [%s] : 0 <= i < pi and 0 <= j < pj }"
         % (
             STATEMENT_VAR_NAME,
@@ -160,20 +164,24 @@ def test_pairwise_schedule_creation():
         )
 
     _align_and_compare_maps(
-        [sched_map_before_exp, sched_map_after_exp],
-        [sched_map_before, sched_map_after],
+        [sched_before_exp, sched_after_exp],
+        [sched_before, sched_after],
         )
 
     # ------------------------------------------------------------------------------
     # Relationship between insn_a and insn_c ---------------------------------------
 
     # Get two maps
-    sio_seq, sio_lconc, (sched_map_before, sched_map_after) = sched_maps[
+    (
+        sio_seq, (sched_before, sched_after)
+    ), (
+        sio_lconc, (lconc_sched_before, lconc_sched_after)
+    ) = scheds[
         ("insn_a", "insn_c")]
 
     # Create expected maps and compare
 
-    sched_map_before_exp = isl.Map(
+    sched_before_exp = isl.Map(
         "[pi, pk] -> { [%s=0, i, k] -> [%s] : 0 <= i < pi and 0 <= k < pk }"
         % (
             STATEMENT_VAR_NAME,
@@ -181,7 +189,7 @@ def test_pairwise_schedule_creation():
             )
         )
 
-    sched_map_after_exp = isl.Map(
+    sched_after_exp = isl.Map(
         "[pi, pj] -> { [%s=1, i, j] -> [%s] : 0 <= i < pi and 0 <= j < pj }"
         % (
             STATEMENT_VAR_NAME,
@@ -190,20 +198,24 @@ def test_pairwise_schedule_creation():
         )
 
     _align_and_compare_maps(
-        [sched_map_before_exp, sched_map_after_exp],
-        [sched_map_before, sched_map_after],
+        [sched_before_exp, sched_after_exp],
+        [sched_before, sched_after],
         )
 
     # ------------------------------------------------------------------------------
     # Relationship between insn_a and insn_d ---------------------------------------
 
     # Get two maps
-    sio_seq, sio_lconc, (sched_map_before, sched_map_after) = sched_maps[
+    (
+        sio_seq, (sched_before, sched_after)
+    ), (
+        sio_lconc, (lconc_sched_before, lconc_sched_after)
+    ) = scheds[
         ("insn_a", "insn_d")]
 
     # Create expected maps and compare
 
-    sched_map_before_exp = isl.Map(
+    sched_before_exp = isl.Map(
         "[pi, pk] -> { [%s=0, i, k] -> [%s] : 0 <= i < pi and 0 <= k < pk }"
         % (
             STATEMENT_VAR_NAME,
@@ -211,7 +223,7 @@ def test_pairwise_schedule_creation():
             )
         )
 
-    sched_map_after_exp = isl.Map(
+    sched_after_exp = isl.Map(
         "[pt] -> { [%s=1, t] -> [%s] : 0 <= t < pt }"
         % (
             STATEMENT_VAR_NAME,
@@ -220,20 +232,24 @@ def test_pairwise_schedule_creation():
         )
 
     _align_and_compare_maps(
-        [sched_map_before_exp, sched_map_after_exp],
-        [sched_map_before, sched_map_after],
+        [sched_before_exp, sched_after_exp],
+        [sched_before, sched_after],
         )
 
     # ------------------------------------------------------------------------------
     # Relationship between insn_b and insn_c ---------------------------------------
 
     # Get two maps
-    sio_seq, sio_lconc, (sched_map_before, sched_map_after) = sched_maps[
+    (
+        sio_seq, (sched_before, sched_after)
+    ), (
+        sio_lconc, (lconc_sched_before, lconc_sched_after)
+    ) = scheds[
         ("insn_b", "insn_c")]
 
     # Create expected maps and compare
 
-    sched_map_before_exp = isl.Map(
+    sched_before_exp = isl.Map(
         "[pi, pj] -> { [%s=0, i, j] -> [%s] : 0 <= i < pi and 0 <= j < pj }"
         % (
             STATEMENT_VAR_NAME,
@@ -241,7 +257,7 @@ def test_pairwise_schedule_creation():
             )
         )
 
-    sched_map_after_exp = isl.Map(
+    sched_after_exp = isl.Map(
         "[pi, pj] -> { [%s=1, i, j] -> [%s] : 0 <= i < pi and 0 <= j < pj }"
         % (
             STATEMENT_VAR_NAME,
@@ -250,20 +266,24 @@ def test_pairwise_schedule_creation():
         )
 
     _align_and_compare_maps(
-        [sched_map_before_exp, sched_map_after_exp],
-        [sched_map_before, sched_map_after],
+        [sched_before_exp, sched_after_exp],
+        [sched_before, sched_after],
         )
 
     # ------------------------------------------------------------------------------
     # Relationship between insn_b and insn_d ---------------------------------------
 
     # Get two maps
-    sio_seq, sio_lconc, (sched_map_before, sched_map_after) = sched_maps[
+    (
+        sio_seq, (sched_before, sched_after)
+    ), (
+        sio_lconc, (lconc_sched_before, lconc_sched_after)
+    ) = scheds[
         ("insn_b", "insn_d")]
 
     # Create expected maps and compare
 
-    sched_map_before_exp = isl.Map(
+    sched_before_exp = isl.Map(
         "[pi, pj] -> { [%s=0, i, j] -> [%s] : 0 <= i < pi and 0 <= j < pj }"
         % (
             STATEMENT_VAR_NAME,
@@ -271,7 +291,7 @@ def test_pairwise_schedule_creation():
             )
         )
 
-    sched_map_after_exp = isl.Map(
+    sched_after_exp = isl.Map(
         "[pt] -> { [%s=1, t] -> [%s] : 0 <= t < pt }"
         % (
             STATEMENT_VAR_NAME,
@@ -280,20 +300,24 @@ def test_pairwise_schedule_creation():
         )
 
     _align_and_compare_maps(
-        [sched_map_before_exp, sched_map_after_exp],
-        [sched_map_before, sched_map_after],
+        [sched_before_exp, sched_after_exp],
+        [sched_before, sched_after],
         )
 
     # ------------------------------------------------------------------------------
     # Relationship between insn_c and insn_d ---------------------------------------
 
     # Get two maps
-    sio_seq, sio_lconc, (sched_map_before, sched_map_after) = sched_maps[
+    (
+        sio_seq, (sched_before, sched_after)
+    ), (
+        sio_lconc, (lconc_sched_before, lconc_sched_after)
+    ) = scheds[
         ("insn_c", "insn_d")]
 
     # Create expected maps and compare
 
-    sched_map_before_exp = isl.Map(
+    sched_before_exp = isl.Map(
         "[pi, pj] -> { [%s=0, i, j] -> [%s] : 0 <= i < pi and 0 <= j < pj }"
         % (
             STATEMENT_VAR_NAME,
@@ -301,7 +325,7 @@ def test_pairwise_schedule_creation():
             )
         )
 
-    sched_map_after_exp = isl.Map(
+    sched_after_exp = isl.Map(
         "[pt] -> { [%s=1, t] -> [%s] : 0 <= t < pt }"
         % (
             STATEMENT_VAR_NAME,
@@ -310,8 +334,8 @@ def test_pairwise_schedule_creation():
         )
 
     _align_and_compare_maps(
-        [sched_map_before_exp, sched_map_after_exp],
-        [sched_map_before, sched_map_after],
+        [sched_before_exp, sched_after_exp],
+        [sched_before, sched_after],
         )
 
 
@@ -354,7 +378,7 @@ def test_pairwise_schedule_creation_with_hw_par_tags():
     stmt_id_pairs = [
         ("stmt_a", "stmt_b"),
         ]
-    sched_maps = get_schedules_for_statement_pairs(
+    scheds = get_schedules_for_statement_pairs(
         lin_knl,
         linearization_items,
         stmt_id_pairs,
@@ -363,12 +387,16 @@ def test_pairwise_schedule_creation_with_hw_par_tags():
     # Relationship between stmt_a and stmt_b ---------------------------------------
 
     # Get two maps
-    sio_seq, sio_lconc, (sched_map_before, sched_map_after) = sched_maps[
+    (
+        sio_seq, (sched_before, sched_after)
+    ), (
+        sio_lconc, (lconc_sched_before, lconc_sched_after)
+    ) = scheds[
         ("stmt_a", "stmt_b")]
 
     # Create expected maps and compare
 
-    sched_map_before_exp = isl.Map(
+    sched_before_exp = isl.Map(
         "[pi,pj] -> {[%s=0,i,ii,j,jj] -> [%s] : 0 <= i,ii < pi and 0 <= j,jj < pj}"
         % (
             STATEMENT_VAR_NAME,
@@ -376,7 +404,7 @@ def test_pairwise_schedule_creation_with_hw_par_tags():
             )
         )
 
-    sched_map_after_exp = isl.Map(
+    sched_after_exp = isl.Map(
         "[pi,pj] -> {[%s=1,i,ii,j,jj] -> [%s] : 0 <= i,ii < pi and 0 <= j,jj < pj}"
         % (
             STATEMENT_VAR_NAME,
@@ -385,8 +413,8 @@ def test_pairwise_schedule_creation_with_hw_par_tags():
         )
 
     _align_and_compare_maps(
-        [sched_map_before_exp, sched_map_after_exp],
-        [sched_map_before, sched_map_after],
+        [sched_before_exp, sched_after_exp],
+        [sched_before, sched_after],
         )
 
     # ------------------------------------------------------------------------------
@@ -492,14 +520,18 @@ def _check_sio_for_stmt_pair(
         exp_sio,
         stmt_id_before,
         stmt_id_after,
-        sched_maps,
+        scheds,
         ):
     from loopy.schedule.checker.utils import (
         ensure_dim_names_match_and_align,
     )
 
     # Get pairwise schedule
-    sio_seq, sio_lconc, (sched_map_before, sched_map_after) = sched_maps[
+    (
+        sio_seq, (sched_before, sched_after)
+    ), (
+        sio_lconc, (lconc_sched_before, lconc_sched_after)
+    ) = scheds[
         (stmt_id_before, stmt_id_after)]
 
     sio_seq_aligned = ensure_dim_names_match_and_align(sio_seq, exp_sio)
@@ -562,7 +594,7 @@ def test_statement_instance_ordering():
         ("stmt_b", "stmt_d"),
         ("stmt_c", "stmt_d"),
         ]
-    sched_maps = get_schedules_for_statement_pairs(
+    scheds = get_schedules_for_statement_pairs(
         knl,
         linearization_items,
         stmt_id_pairs,
@@ -580,7 +612,7 @@ def test_statement_instance_ordering():
     exp_sio_seq = append_marker_to_isl_map_var_names(
         exp_sio_seq, isl.dim_type.in_, "'")
 
-    _check_sio_for_stmt_pair(exp_sio_seq, "stmt_a", "stmt_b", sched_maps)
+    _check_sio_for_stmt_pair(exp_sio_seq, "stmt_a", "stmt_b", scheds)
 
     # Relationship between stmt_a and stmt_c ---------------------------------------
 
@@ -594,7 +626,7 @@ def test_statement_instance_ordering():
     exp_sio_seq = append_marker_to_isl_map_var_names(
         exp_sio_seq, isl.dim_type.in_, "'")
 
-    _check_sio_for_stmt_pair(exp_sio_seq, "stmt_a", "stmt_c", sched_maps)
+    _check_sio_for_stmt_pair(exp_sio_seq, "stmt_a", "stmt_c", scheds)
 
     # Relationship between stmt_a and stmt_d ---------------------------------------
 
@@ -608,7 +640,7 @@ def test_statement_instance_ordering():
     exp_sio_seq = append_marker_to_isl_map_var_names(
         exp_sio_seq, isl.dim_type.in_, "'")
 
-    _check_sio_for_stmt_pair(exp_sio_seq, "stmt_a", "stmt_d", sched_maps)
+    _check_sio_for_stmt_pair(exp_sio_seq, "stmt_a", "stmt_d", scheds)
 
     # Relationship between stmt_b and stmt_c ---------------------------------------
 
@@ -624,7 +656,7 @@ def test_statement_instance_ordering():
     exp_sio_seq = append_marker_to_isl_map_var_names(
         exp_sio_seq, isl.dim_type.in_, "'")
 
-    _check_sio_for_stmt_pair(exp_sio_seq, "stmt_b", "stmt_c", sched_maps)
+    _check_sio_for_stmt_pair(exp_sio_seq, "stmt_b", "stmt_c", scheds)
 
     # Relationship between stmt_b and stmt_d ---------------------------------------
 
@@ -638,7 +670,7 @@ def test_statement_instance_ordering():
     exp_sio_seq = append_marker_to_isl_map_var_names(
         exp_sio_seq, isl.dim_type.in_, "'")
 
-    _check_sio_for_stmt_pair(exp_sio_seq, "stmt_b", "stmt_d", sched_maps)
+    _check_sio_for_stmt_pair(exp_sio_seq, "stmt_b", "stmt_d", scheds)
 
     # Relationship between stmt_c and stmt_d ---------------------------------------
 
@@ -652,7 +684,7 @@ def test_statement_instance_ordering():
     exp_sio_seq = append_marker_to_isl_map_var_names(
         exp_sio_seq, isl.dim_type.in_, "'")
 
-    _check_sio_for_stmt_pair(exp_sio_seq, "stmt_c", "stmt_d", sched_maps)
+    _check_sio_for_stmt_pair(exp_sio_seq, "stmt_c", "stmt_d", scheds)
 
 
 def test_statement_instance_ordering_with_hw_par_tags():
@@ -699,7 +731,7 @@ def test_statement_instance_ordering_with_hw_par_tags():
     stmt_id_pairs = [
         ("stmt_a", "stmt_b"),
         ]
-    sched_maps = get_schedules_for_statement_pairs(
+    scheds = get_schedules_for_statement_pairs(
         lin_knl,
         linearization_items,
         stmt_id_pairs,
@@ -726,7 +758,7 @@ def test_statement_instance_ordering_with_hw_par_tags():
     exp_sio_seq = append_marker_to_isl_map_var_names(
         exp_sio_seq, isl.dim_type.in_, "'")
 
-    _check_sio_for_stmt_pair(exp_sio_seq, "stmt_a", "stmt_b", sched_maps)
+    _check_sio_for_stmt_pair(exp_sio_seq, "stmt_a", "stmt_b", scheds)
 
     # ------------------------------------------------------------------------------
 

From 0c3890d28e63d32301ac9d2cff0665a7016d4354 Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Tue, 23 Mar 2021 11:10:34 -0500
Subject: [PATCH 190/460] (WIP) create separate global barrier sio map

---
 loopy/schedule/checker/schedule.py | 515 ++++++++++++++++++-----------
 test/test_linearization_checker.py |  16 +
 2 files changed, 346 insertions(+), 185 deletions(-)

diff --git a/loopy/schedule/checker/schedule.py b/loopy/schedule/checker/schedule.py
index 7f83c4032..6212b5e44 100644
--- a/loopy/schedule/checker/schedule.py
+++ b/loopy/schedule/checker/schedule.py
@@ -122,7 +122,7 @@ def _simplify_lex_dims(tup0, tup1):
 
 def generate_pairwise_schedules(
         knl,
-        linearization_items,
+        lin_items,
         insn_id_pairs,
         loops_to_ignore=set(),
         ):
@@ -137,7 +137,7 @@ def generate_pairwise_schedules(
         kernel will be used to get the domains associated with the inames
         used in the statements.
 
-    :arg linearization_items: A list of :class:`loopy.schedule.ScheduleItem`
+    :arg lin_items: A list of :class:`loopy.schedule.ScheduleItem`
         (to be renamed to `loopy.schedule.LinearizationItem`) containing
         all linearization items for which pairwise schedules will be
         created. To allow usage of this routine during linearization, a
@@ -168,7 +168,7 @@ def generate_pairwise_schedules(
 
     all_insn_ids = set().union(*insn_id_pairs)
 
-    # First, use one pass through linearization_items to generate a lexicographic
+    # First, use one pass through lin_items to generate a lexicographic
     # ordering describing the relative order of *all* statements represented by
     # all_insn_ids
 
@@ -181,9 +181,9 @@ def generate_pairwise_schedules(
     # ordering, initially this as a 1-d point with value 0
     next_insn_lex_tuple = [0]
 
-    for linearization_item in linearization_items:
-        if isinstance(linearization_item, EnterLoop):
-            iname = linearization_item.iname
+    for lin_item in lin_items:
+        if isinstance(lin_item, EnterLoop):
+            iname = lin_item.iname
             if iname in loops_to_ignore:
                 continue
 
@@ -199,8 +199,8 @@ def generate_pairwise_schedules(
             next_insn_lex_tuple.append(iname)
             next_insn_lex_tuple.append(0)
 
-        elif isinstance(linearization_item, LeaveLoop):
-            if linearization_item.iname in loops_to_ignore:
+        elif isinstance(lin_item, LeaveLoop):
+            if lin_item.iname in loops_to_ignore:
                 continue
 
             # Upon leaving a loop,
@@ -217,14 +217,14 @@ def generate_pairwise_schedules(
             # in the simplification step below)
             next_insn_lex_tuple[-1] += 1
 
-        elif isinstance(linearization_item, (RunInstruction, Barrier)):
+        elif isinstance(lin_item, (RunInstruction, Barrier)):
             from loopy.schedule.checker.utils import (
                 get_insn_id_from_linearization_item,
             )
-            lp_insn_id = get_insn_id_from_linearization_item(linearization_item)
+            lp_insn_id = get_insn_id_from_linearization_item(lin_item)
 
             if lp_insn_id is None:
-                assert isinstance(linearization_item, Barrier)
+                assert isinstance(lin_item, Barrier)
 
                 # Barriers without insn ids were inserted as a result of a
                 # dependency. They don't themselves have dependencies. Ignore them.
@@ -247,7 +247,7 @@ def generate_pairwise_schedules(
             from loopy.schedule import (CallKernel, ReturnFromKernel)
             # No action needed for these types of linearization item
             assert isinstance(
-                linearization_item, (CallKernel, ReturnFromKernel))
+                lin_item, (CallKernel, ReturnFromKernel))
             pass
 
         # To save time, stop when we've found all statements
@@ -284,20 +284,16 @@ def generate_pairwise_schedules(
 
     # {{{ Determine which loops contain barriers
 
-    loops_with_lbarriers = set()
-    loops_with_gbarriers = set()
+    loops_with_barriers = {"local": set(), "global": set()}
     current_inames = set()
 
-    for linearization_item in linearization_items:
-        if isinstance(linearization_item, EnterLoop):
-            current_inames.add(linearization_item.iname)
-        elif isinstance(linearization_item, LeaveLoop):
-            current_inames.remove(linearization_item.iname)
-        elif isinstance(linearization_item, Barrier):
-            if linearization_item.synchronization_kind == "local":
-                loops_with_lbarriers |= current_inames
-            elif linearization_item.synchronization_kind == "global":
-                loops_with_gbarriers |= current_inames
+    for lin_item in lin_items:
+        if isinstance(lin_item, EnterLoop):
+            current_inames.add(lin_item.iname)
+        elif isinstance(lin_item, LeaveLoop):
+            current_inames.remove(lin_item.iname)
+        elif isinstance(lin_item, Barrier):
+            loops_with_barriers[lin_item.synchronization_kind] |= current_inames
             # At this point we could technically skip ahead to next enterloop
 
     # }}}
@@ -306,7 +302,7 @@ def generate_pairwise_schedules(
     # (Could try to combine this with pass below but would make things messy)
 
     iname_bounds_pwaff = {}
-    for iname in loops_with_lbarriers:
+    for iname in loops_with_barriers["local"] | loops_with_barriers["global"]:
         # Get first and last vals for this iname
         bounds = knl.get_iname_bounds(iname)
         iname_bounds_pwaff[iname] = (
@@ -314,22 +310,279 @@ def generate_pairwise_schedules(
 
     # }}}
 
-    # {{{ Construct blueprint for creating blex space and orderings
-    # TODO combine this pass over the linearization items with the pass above
+    def _collect_blex_ordering_info(sync_kind):
+
+        # {{{ Construct blueprint for creating blex space and orderings
+        # TODO combine this pass over the linearization items with the pass above
+
+        stmt_inst_to_blex = {}  # map stmt instances to blex space
+        iname_to_blex_dim = {}  # map from inames to corresponding blex space dim
+        blex_exclusion_info = {}  # info for creating pairs to subtract from blex order
+        blex_map_params = set()  # params needed in blex map
+        n_blex_dims = 1  # number of dims in blex space
+        next_blex_pt = [0]  # next tuple of points in blex order
+
+        for lin_item in lin_items:
+            if isinstance(lin_item, EnterLoop):
+                enter_iname = lin_item.iname
+                if enter_iname in loops_with_barriers[sync_kind]:
+                   # update next blex pt
+                    pre_loop_blex_pt = next_blex_pt[:]
+                    next_blex_pt[-1] += 1
+                    next_blex_pt.append(enter_iname)
+                    next_blex_pt.append(0)
+
+                    # store tuples that will be used to create pairs
+                    # that will later be subtracted from happens-before map
+                    lbound = iname_bounds_pwaff[enter_iname][0]
+                    first_iter_blex_pt = next_blex_pt[:]
+                    first_iter_blex_pt[-2] = lbound
+                    blex_exclusion_info[enter_iname] = {
+                        PRE: tuple(pre_loop_blex_pt),  # make sure to copy
+                        TOP: tuple(next_blex_pt),  # make sure to copy
+                        FIRST: tuple(first_iter_blex_pt),  # make sure to copy
+                        }
+                    blex_map_params |= set(lbound.get_var_names(dt.param))
+
+            elif isinstance(lin_item, LeaveLoop):
+                leave_iname = lin_item.iname
+                if leave_iname in loops_with_barriers[sync_kind]:
+
+                    # update max blex dims
+                    n_blex_dims = max(n_blex_dims, len(next_blex_pt))
+                    iname_to_blex_dim[leave_iname] = len(next_blex_pt)-2
+
+                    # update next blex pt
+                    pre_end_loop_blex_pt = next_blex_pt[:]
+                    next_blex_pt.pop()
+                    next_blex_pt.pop()
+                    next_blex_pt[-1] += 1
+
+                    # store tuples that will be used to create pairs
+                    # that will later be subtracted from happens-before map
+                    ubound = iname_bounds_pwaff[leave_iname][1]
+                    last_iter_blex_pt = pre_end_loop_blex_pt[:]
+                    last_iter_blex_pt[-2] = ubound
+                    blex_exclusion_info[leave_iname][BOTTOM] = tuple(
+                        pre_end_loop_blex_pt)
+                    blex_exclusion_info[leave_iname][LAST] = tuple(last_iter_blex_pt)
+                    blex_exclusion_info[leave_iname][POST] = tuple(next_blex_pt)
+                    # (make sure ^these are copies)
+                    blex_map_params |= set(ubound.get_var_names(dt.param))
+
+            elif isinstance(lin_item, RunInstruction):
+                # Add item to stmt_inst_to_blex
+                stmt_inst_to_blex[lin_item.insn_id] = tuple(next_blex_pt)
+                # Don't increment blex dim val
+
+            elif isinstance(lin_item, Barrier):
+                # Increment blex dim val
+                next_blex_pt[-1] += 1
+
+            else:
+                from loopy.schedule import (CallKernel, ReturnFromKernel)
+                # No action needed for these types of linearization item
+                assert isinstance(
+                    lin_item, (CallKernel, ReturnFromKernel))
+                pass
+
+        blex_map_params = sorted(blex_map_params)
+
+        # At this point, some blex tuples may have more dimensions than others;
+        # the missing dims are the fastest-updating dims, and their values should
+        # be zero. Add them.
+        for stmt, tup in stmt_inst_to_blex.items():
+            stmt_inst_to_blex[stmt] = _pad_tuple_with_zeros(tup, n_blex_dims)
+
+        # }}}
+
+        # Create names for the blex dimensions for sequential loops
+        from loopy.schedule.checker.utils import (
+            append_marker_to_strings,
+        )
+        seq_blex_dim_names = [
+            BLEX_VAR_PREFIX+str(i) for i in range(n_blex_dims)]
+        seq_blex_dim_names_prime = append_marker_to_strings(
+            seq_blex_dim_names, marker=BEFORE_MARK)
+
+        blex_order_map = create_lex_order_map(
+            before_names=seq_blex_dim_names_prime,
+            after_names=seq_blex_dim_names,
+            after_names_concurrent=conc_lex_dim_names,
+            conc_var_comparison_op="ne",
+            in_dim_marker=BEFORE_MARK,
+            )
+
+        iname_to_blex_var = {}
+        for iname, dim in iname_to_blex_dim.items():
+            iname_to_blex_var[iname] = seq_blex_dim_names[dim]
+            iname_to_blex_var[iname+BEFORE_MARK] = seq_blex_dim_names_prime[dim]
+
+        # Add params to blex map
+        blex_order_map = blex_order_map.add_dims(dt.param, len(blex_map_params))
+        for i, p in enumerate(blex_map_params):
+            blex_order_map = blex_order_map.set_dim_name(dt.param, i, p)
+
+        # get a set representing blex_order_map space
+        blex_set_template = isl.align_spaces(
+            isl.Map("[ ] -> { [ ] -> [ ] }"), blex_order_map
+            ).move_dims(
+            dt.in_, n_blex_dims, dt.out, 0, n_blex_dims
+            ).domain()
+        blex_set_affs = isl.affs_from_space(blex_set_template.space)
+
+        def _create_subtraction_map_for_iname(iname, blueprint):
+            # Note: blueprint[FIRST] and blueprint[LAST] contain pwaffs
+
+            def _create_blex_set_from_tuple_pair(before, after, wrap_cond=False):
+
+                # start with a set representing blex_order_map space
+                blex_set = blex_set_template.copy()
+
+                # add markers to inames in before tuple
+                # (assume strings are the inames)
+                before_prime = tuple(
+                    v+BEFORE_MARK if isinstance(v, str) else v for v in before)
+                before_padded = _pad_tuple_with_zeros(before_prime, n_blex_dims)
+                after_padded = _pad_tuple_with_zeros(after, n_blex_dims)
+
+                # assign vals to dims
+                for dim_name, dim_val in zip(
+                        seq_blex_dim_names_prime+seq_blex_dim_names,
+                        before_padded+after_padded):
+                    # (could exploit knowledge of content types of odd/even
+                    # tuple dims to reduce conditionals but would be ugly
+                    # and less robust)
+                    if isinstance(dim_val, int):
+                        # set idx to int val
+                        blex_set &= blex_set_affs[dim_name].eq_set(
+                            blex_set_affs[0]+dim_val)
+                    elif isinstance(dim_val, str):
+                        # assume this is an iname, set idx to corresponding blex var
+                        blex_set &= blex_set_affs[dim_name].eq_set(
+                            blex_set_affs[iname_to_blex_var[dim_val]])
+                    else:
+                        assert isinstance(dim_val, isl.PwAff)
+                        pwaff_aligned = isl.align_spaces(dim_val, blex_set_affs[0])
+                        # (doesn't matter which element of blex_set_affs we use^)
+                        blex_set &= blex_set_affs[dim_name].eq_set(pwaff_aligned)
+
+                if wrap_cond:
+                    # i = i' + step
+                    # TODO what about step sizes != 1?
+                    blex_set &= blex_set_affs[iname_to_blex_var[iname]].eq_set(
+                        blex_set_affs[iname_to_blex_var[iname+BEFORE_MARK]] + 1)
+
+                return blex_set
+
+            # enter loop case
+            full_blex_set = _create_blex_set_from_tuple_pair(
+                blueprint[PRE], blueprint[FIRST])
+            # wrap loop case
+            full_blex_set |= _create_blex_set_from_tuple_pair(
+                blueprint[BOTTOM], blueprint[TOP], wrap_cond=True)
+            # leave loop case
+            full_blex_set |= _create_blex_set_from_tuple_pair(
+                blueprint[LAST], blueprint[POST])
+
+            # add cond to fix iteration value for surrounding loops (i = i')
+            for surrounding_iname in blueprint[PRE][1::2]:
+                s_blex_var = iname_to_blex_var[surrounding_iname]
+                full_blex_set &= blex_set_affs[s_blex_var].eq_set(
+                    blex_set_affs[s_blex_var+BEFORE_MARK])
+
+            # convert blex set back to map
+            return isl.Map.from_domain(full_blex_set).move_dims(
+                dt.out, 0, dt.in_, n_blex_dims, n_blex_dims)
+
+        # subtract unwanted pairs from happens-before blex map
+        maps_to_subtract = []
+        for iname, subdict in blex_exclusion_info.items():
+            maps_to_subtract.append(_create_subtraction_map_for_iname(iname, subdict))
+
+        if maps_to_subtract:
+            # get union of maps
+            map_to_subtract = maps_to_subtract[0]
+            for other_map in maps_to_subtract[1:]:
+                map_to_subtract |= other_map
+
+            # get some closure
+            map_to_subtract, closure_exact = map_to_subtract.transitive_closure()
+            assert closure_exact  # TODO warn instead
+
+            # subtract from blex order map
+            blex_order_map = blex_order_map - map_to_subtract
+
+        return (
+            stmt_inst_to_blex,  # map stmt instances to blex space
+            blex_order_map,
+            seq_blex_dim_names,
+            )
+
+    # {{{ combining local and global stuff in single pass (old, TODO remove?)
+    """
+    GLOBAL = "global"
+    LOCAL = "local"
+    stmt_inst_to_blex = {LOCAL: {}, GLOBAL: {}}  # map stmt instances to blex space
+    iname_to_blex_dim = {LOCAL: {}, GLOBAL: {}}  # map from inames to corresponding blex space dim
+    blex_exclusion_info = {LOCAL: {}, GLOBAL: {}}  # info for creating pairs to subtract from blex order
+    blex_map_params = {LOCAL: set(), GLOBAL: set()}  # params needed in blex map
+    next_blex_pt = {LOCAL: [0], GLOBAL: [0]}  # next tuple of points in blex order
+    n_blex_dims = {LOCAL: 1, GLOBAL: 1}  # number of dims in blex space
+
+    def _enter_loop_blex_processing(scope, enter_iname):
+        # scope is either LOCAL or GLOBAL
+
+        pre_loop_blex_pt = next_blex_pt[scope][:]
+        next_blex_pt[scope][-1] += 1
+        next_blex_pt[scope].append(enter_iname)
+        next_blex_pt[scope].append(0)
+
+        # store tuples that will be used to create pairs
+        # that will later be subtracted from happens-before map
+        lbound = iname_bounds_pwaff[enter_iname][0]
+        first_iter_blex_pt = next_blex_pt[scope][:]
+        first_iter_blex_pt[-2] = lbound
+        blex_exclusion_info[scope][enter_iname] = {
+            PRE: tuple(pre_loop_blex_pt),  # make sure to copy
+            TOP: tuple(next_blex_pt[scope]),  # make sure to copy
+            FIRST: tuple(first_iter_blex_pt),  # make sure to copy
+            }
+        blex_map_params[scope] |= set(lbound.get_var_names(dt.param))
+
+    def _leave_loop_blex_processing(scope, leave_iname):
+        # scope is either LOCAL or GLOBAL
+
+        # update max blex dims
+        n_blex_dims[scope] = max(n_blex_dims[scope], len(next_blex_pt[scope]))
+        iname_to_blex_dim[scope][leave_iname] = len(next_blex_pt[scope])-2
+
+        # update next blex pt
+        pre_end_loop_blex_pt = next_blex_pt[scope][:]
+        next_blex_pt[scope].pop()
+        next_blex_pt[scope].pop()
+        next_blex_pt[scope][-1] += 1
+
+        # store tuples that will be used to create pairs
+        # that will later be subtracted from happens-before map
+        ubound = iname_bounds_pwaff[leave_iname][1]
+        last_iter_blex_pt = pre_end_loop_blex_pt[:]
+        last_iter_blex_pt[-2] = ubound
+        blex_exclusion_info[scope][leave_iname][BOTTOM] = tuple(
+            pre_end_loop_blex_pt)
+        blex_exclusion_info[scope][leave_iname][LAST] = tuple(last_iter_blex_pt)
+        blex_exclusion_info[scope][leave_iname][POST] = tuple(next_blex_pt[scope])
+        # (make sure ^these are copies)
+        blex_map_params[scope] |= set(ubound.get_var_names(dt.param))
 
-    stmt_inst_to_lblex = {}  # map stmt instances to lblex space
-    iname_to_lblex_dim = {}  # map from inames to corresponding lblex space dim
-    lblex_exclusion_info = {}  # info for creating pairs to subtract from lblex order
-    lblex_map_params = set()  # params needed in lblex map
-    next_lblex_pt = [0]  # next tuple of points in lblex order
-    n_lblex_dims = 1  # number of dims in lblex space
 
     # do both lblex and gblex processing in single pass through insns
-    for linearization_item in linearization_items:
-        if isinstance(linearization_item, EnterLoop):
-            enter_iname = linearization_item.iname
+    for lin_item in lin_items:
+        if isinstance(lin_item, EnterLoop):
+            enter_iname = lin_item.iname
             if enter_iname in loops_with_lbarriers:
-                # update next blex pt
+                _enter_loop_blex_processing(LOCAL, enter_iname)
+               # update next blex pt
                 pre_loop_lblex_pt = next_lblex_pt[:]
                 next_lblex_pt[-1] += 1
                 next_lblex_pt.append(enter_iname)
@@ -346,10 +599,14 @@ def generate_pairwise_schedules(
                     FIRST: tuple(first_iter_lblex_pt),  # make sure to copy
                     }
                 lblex_map_params |= set(lbound.get_var_names(dt.param))
+            if enter_iname in loops_with_gbarriers:
+                _enter_loop_blex_processing(GLOBAL, enter_iname)
 
-        elif isinstance(linearization_item, LeaveLoop):
-            leave_iname = linearization_item.iname
+        elif isinstance(lin_item, LeaveLoop):
+            leave_iname = lin_item.iname
             if leave_iname in loops_with_lbarriers:
+                _leave_loop_blex_processing(LOCAL, leave_iname)
+
                 # update max blex dims
                 n_lblex_dims = max(n_lblex_dims, len(next_lblex_pt))
                 iname_to_lblex_dim[leave_iname] = len(next_lblex_pt)-2
@@ -371,15 +628,20 @@ def generate_pairwise_schedules(
                 lblex_exclusion_info[leave_iname][POST] = tuple(next_lblex_pt)
                 # (make sure ^these are copies)
                 lblex_map_params |= set(ubound.get_var_names(dt.param))
+            if leave_iname in loops_with_gbarriers:
+                _leave_loop_blex_processing(GLOBAL, leave_iname)
 
-        elif isinstance(linearization_item, RunInstruction):
+        elif isinstance(lin_item, RunInstruction):
             # Add item to stmt_inst_to_lblex
-            lp_insn_id = linearization_item.insn_id
+            lp_insn_id = lin_item.insn_id
+            stmt_inst_to_blex[LOCAL][lp_insn_id] = tuple(next_blex_pt[LOCAL])
+            stmt_inst_to_blex[GLOBAL][lp_insn_id] = tuple(next_blex_pt[GLOBAL])
+
             stmt_inst_to_lblex[lp_insn_id] = tuple(next_lblex_pt)
 
             # Don't increment blex dim val
 
-        elif isinstance(linearization_item, Barrier):
+        elif isinstance(lin_item, Barrier):
 
             next_lblex_pt[-1] += 1
 
@@ -387,133 +649,15 @@ def generate_pairwise_schedules(
             from loopy.schedule import (CallKernel, ReturnFromKernel)
             # No action needed for these types of linearization item
             assert isinstance(
-                linearization_item, (CallKernel, ReturnFromKernel))
+                lin_item, (CallKernel, ReturnFromKernel))
             pass
 
     lblex_map_params = sorted(lblex_map_params)
-
+    """
     # }}}
 
-    # pad tuples w/zeros
-    for stmt, tup in stmt_inst_to_lblex.items():
-        stmt_inst_to_lblex[stmt] = _pad_tuple_with_zeros(tup, n_lblex_dims)
-
-    # Create names for the blex dimensions for sequential loops
-    from loopy.schedule.checker.utils import (
-        append_marker_to_strings,
-    )
-    seq_lblex_dim_names = [
-        BLEX_VAR_PREFIX+str(i) for i in range(n_lblex_dims)]
-    seq_lblex_dim_names_prime = append_marker_to_strings(
-        seq_lblex_dim_names, marker=BEFORE_MARK)
-
-    lblex_order_map = create_lex_order_map(
-        before_names=seq_lblex_dim_names_prime,
-        after_names=seq_lblex_dim_names,
-        after_names_concurrent=conc_lex_dim_names,
-        conc_var_comparison_op="ne",
-        in_dim_marker=BEFORE_MARK,
-        )
-
-    iname_to_lblex_var = {}
-    for iname, dim in iname_to_lblex_dim.items():
-        iname_to_lblex_var[iname] = seq_lblex_dim_names[dim]
-        iname_to_lblex_var[iname+BEFORE_MARK] = seq_lblex_dim_names_prime[dim]
-
-    # Add params to blex map
-    lblex_order_map = lblex_order_map.add_dims(dt.param, len(lblex_map_params))
-    for i, p in enumerate(lblex_map_params):
-        lblex_order_map = lblex_order_map.set_dim_name(dt.param, i, p)
-
-    # get a set representing blex_order_map space
-    lblex_set_template = isl.align_spaces(
-        isl.Map("[ ] -> { [ ] -> [ ] }"), lblex_order_map
-        ).move_dims(
-        dt.in_, n_lblex_dims, dt.out, 0, n_lblex_dims
-        ).domain()
-    lblex_set_affs = isl.affs_from_space(lblex_set_template.space)
-
-    def _create_subtraction_map_for_iname(iname, blueprint):
-        # Note: blueprint[FIRST] and blueprint[LAST] contain pwaffs
-
-        def _create_blex_set_from_tuple_pair(before, after, wrap_cond=False):
-
-            # start with a set representing blex_order_map space
-            lblex_set = lblex_set_template.copy()
-
-            # add markers to inames in before tuple
-            # (assume strings are the inames)
-            before_prime = tuple(
-                v+BEFORE_MARK if isinstance(v, str) else v for v in before)
-            before_padded = _pad_tuple_with_zeros(before_prime, n_lblex_dims)
-            after_padded = _pad_tuple_with_zeros(after, n_lblex_dims)
-
-            # assign vals to dims
-            for dim_name, dim_val in zip(
-                    seq_lblex_dim_names_prime+seq_lblex_dim_names,
-                    before_padded+after_padded):
-                # (could exploit knowledge of content types of odd/even
-                # tuple dims to reduce conditionals but would be ugly
-                # and less robust)
-                if isinstance(dim_val, int):
-                    # set idx to int val
-                    lblex_set &= lblex_set_affs[dim_name].eq_set(
-                        lblex_set_affs[0]+dim_val)
-                elif isinstance(dim_val, str):
-                    # assume this is an iname, set idx to corresponding blex var
-                    lblex_set &= lblex_set_affs[dim_name].eq_set(
-                        lblex_set_affs[iname_to_lblex_var[dim_val]])
-                else:
-                    assert isinstance(dim_val, isl.PwAff)
-                    pwaff_aligned = isl.align_spaces(dim_val, lblex_set_affs[0])
-                    # (doesn't matter which element of lblex_set_affs we use^)
-                    lblex_set &= lblex_set_affs[dim_name].eq_set(pwaff_aligned)
-
-            if wrap_cond:
-                # i = i' + step
-                # TODO what about step sizes != 1?
-                lblex_set &= lblex_set_affs[iname_to_lblex_var[iname]].eq_set(
-                    lblex_set_affs[iname_to_lblex_var[iname+BEFORE_MARK]] + 1)
-
-            return lblex_set
-
-        # enter loop case
-        full_lblex_set = _create_blex_set_from_tuple_pair(
-            blueprint[PRE], blueprint[FIRST])
-        # wrap loop case
-        full_lblex_set |= _create_blex_set_from_tuple_pair(
-            blueprint[BOTTOM], blueprint[TOP], wrap_cond=True)
-        # leave loop case
-        full_lblex_set |= _create_blex_set_from_tuple_pair(
-            blueprint[LAST], blueprint[POST])
-
-        # add cond to fix iteration value for surrounding loops (i = i')
-        for surrounding_iname in blueprint[PRE][1::2]:
-            s_lblex_var = iname_to_lblex_var[surrounding_iname]
-            full_lblex_set &= lblex_set_affs[s_lblex_var].eq_set(
-                lblex_set_affs[s_lblex_var+BEFORE_MARK])
-
-        # convert blex set back to map
-        return isl.Map.from_domain(full_lblex_set).move_dims(
-            dt.out, 0, dt.in_, n_lblex_dims, n_lblex_dims)
-
-    # subtract unwanted pairs from happens-before blex map
-    maps_to_subtract = []
-    for iname, subdict in lblex_exclusion_info.items():
-        maps_to_subtract.append(_create_subtraction_map_for_iname(iname, subdict))
-
-    if maps_to_subtract:
-        # get union of maps
-        map_to_subtract = maps_to_subtract[0]
-        for other_map in maps_to_subtract[1:]:
-            map_to_subtract |= other_map
-
-        # get some closure
-        map_to_subtract, closure_exact = map_to_subtract.transitive_closure()
-        assert closure_exact  # TODO warn instead
-
-        # subtract from blex order map
-        lblex_order_map = lblex_order_map - map_to_subtract
+    stmt_inst_to_lblex, lblex_order_map, seq_lblex_dim_names = _collect_blex_ordering_info("local")
+    stmt_inst_to_gblex, gblex_order_map, seq_gblex_dim_names = _collect_blex_ordering_info("global")
 
     # }}}  end blex order/map machinery
 
@@ -635,21 +779,9 @@ def _get_map_for_stmt(
 
         # TODO finish separating lid stuff from gid stuff
 
-        # NOTE: use *unsimplified* lex tuples with blex map
-
-        lblex_tuples = [stmt_inst_to_lblex[insn_id] for insn_id in insn_ids]
-
-        # At this point, one of the lex tuples may have more dimensions than another;
-        # the missing dims are the fastest-updating dims, and their values should
-        # be zero. Add them.
-        max_lblex_dims = max([len(lblex_tuple) for lblex_tuple in lblex_tuples])
-        lblex_tuples_padded = [
-            _pad_tuple_with_zeros(lblex_tuple, max_lblex_dims)
-            for lblex_tuple in lblex_tuples]
+        # NOTE: use *unsimplified* lex tuples with blex map, which have already been padded
 
-        # Create names for the output dimensions for sequential loops
-        seq_lblex_dim_names = [
-            BLEX_VAR_PREFIX+str(i) for i in range(len(lblex_tuples_padded[0]))]
+        lblex_tuples_padded = [stmt_inst_to_lblex[insn_id] for insn_id in insn_ids]
 
         lconc_sched_maps = [
             _get_map_for_stmt(
@@ -666,13 +798,25 @@ def _get_map_for_stmt(
             before_marker=BEFORE_MARK,
             )
 
+        # TODO use func to avoid duplicated code here:
+
+        gblex_tuples_padded = [stmt_inst_to_gblex[insn_id] for insn_id in insn_ids]
+
+        gconc_sched_maps = [
+            _get_map_for_stmt(
+                insn_id, gblex_tuple, int_sid,
+                seq_gblex_dim_names+conc_lex_dim_names)  # conc names same for all
+            for insn_id, gblex_tuple, int_sid
+            in zip(insn_ids, gblex_tuples_padded, int_sids)
+            ]
+
         # Create statement instance ordering
-        # TODO
-        #sio_gconc = get_statement_ordering_map(
-        #    *gconc_sched_maps,  # note, func accepts exactly two maps
-        #    gblex_order_map,
-        #    before_marker=BEFORE_MARK,
-        #    )
+        sio_gconc = get_statement_ordering_map(
+            *gconc_sched_maps,  # note, func accepts exactly two maps
+            gblex_order_map,
+            before_marker=BEFORE_MARK,
+            )
+
 
         # }}}
 
@@ -680,7 +824,8 @@ def _get_map_for_stmt(
         #pairwise_schedules[tuple(insn_ids)] = tuple(intra_thread_sched_maps)
         pairwise_schedules[tuple(insn_ids)] = (
             (sio_seq, tuple(intra_thread_sched_maps), ),
-            (sio_lconc, tuple(lconc_sched_maps), )
+            (sio_lconc, tuple(lconc_sched_maps), ),
+            (sio_gconc, tuple(gconc_sched_maps), ),
             )
 
     return pairwise_schedules
diff --git a/test/test_linearization_checker.py b/test/test_linearization_checker.py
index 385f83b15..426f15f47 100644
--- a/test/test_linearization_checker.py
+++ b/test/test_linearization_checker.py
@@ -142,6 +142,8 @@ def test_pairwise_schedule_creation():
         sio_seq, (sched_before, sched_after)
     ), (
         sio_lconc, (lconc_sched_before, lconc_sched_after)
+    ), (
+        sio_gconc, (gconc_sched_before, gconc_sched_after)
     ) = scheds[
         ("insn_a", "insn_b")]
 
@@ -176,6 +178,8 @@ def test_pairwise_schedule_creation():
         sio_seq, (sched_before, sched_after)
     ), (
         sio_lconc, (lconc_sched_before, lconc_sched_after)
+    ), (
+        sio_gconc, (gconc_sched_before, gconc_sched_after)
     ) = scheds[
         ("insn_a", "insn_c")]
 
@@ -210,6 +214,8 @@ def test_pairwise_schedule_creation():
         sio_seq, (sched_before, sched_after)
     ), (
         sio_lconc, (lconc_sched_before, lconc_sched_after)
+    ), (
+        sio_gconc, (gconc_sched_before, gconc_sched_after)
     ) = scheds[
         ("insn_a", "insn_d")]
 
@@ -244,6 +250,8 @@ def test_pairwise_schedule_creation():
         sio_seq, (sched_before, sched_after)
     ), (
         sio_lconc, (lconc_sched_before, lconc_sched_after)
+    ), (
+        sio_gconc, (gconc_sched_before, gconc_sched_after)
     ) = scheds[
         ("insn_b", "insn_c")]
 
@@ -278,6 +286,8 @@ def test_pairwise_schedule_creation():
         sio_seq, (sched_before, sched_after)
     ), (
         sio_lconc, (lconc_sched_before, lconc_sched_after)
+    ), (
+        sio_gconc, (gconc_sched_before, gconc_sched_after)
     ) = scheds[
         ("insn_b", "insn_d")]
 
@@ -312,6 +322,8 @@ def test_pairwise_schedule_creation():
         sio_seq, (sched_before, sched_after)
     ), (
         sio_lconc, (lconc_sched_before, lconc_sched_after)
+    ), (
+        sio_gconc, (gconc_sched_before, gconc_sched_after)
     ) = scheds[
         ("insn_c", "insn_d")]
 
@@ -391,6 +403,8 @@ def test_pairwise_schedule_creation_with_hw_par_tags():
         sio_seq, (sched_before, sched_after)
     ), (
         sio_lconc, (lconc_sched_before, lconc_sched_after)
+    ), (
+        sio_gconc, (gconc_sched_before, gconc_sched_after)
     ) = scheds[
         ("stmt_a", "stmt_b")]
 
@@ -531,6 +545,8 @@ def _check_sio_for_stmt_pair(
         sio_seq, (sched_before, sched_after)
     ), (
         sio_lconc, (lconc_sched_before, lconc_sched_after)
+    ), (
+        sio_gconc, (gconc_sched_before, gconc_sched_after)
     ) = scheds[
         (stmt_id_before, stmt_id_after)]
 

From 59a829364134673226855e09350a985390c93c09 Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Tue, 23 Mar 2021 19:29:24 -0500
Subject: [PATCH 191/460] create helper functions add_and_name_isl_dims(),
 add_eq_isl_constraint_from_names(), add_ne_isl_constraint_from_names()

---
 loopy/schedule/checker/utils.py | 25 +++++++++++++++++++++++++
 1 file changed, 25 insertions(+)

diff --git a/loopy/schedule/checker/utils.py b/loopy/schedule/checker/utils.py
index 39c7f48e0..a6636c41c 100644
--- a/loopy/schedule/checker/utils.py
+++ b/loopy/schedule/checker/utils.py
@@ -35,6 +35,14 @@ def add_dims_to_isl_set(isl_set, dim_type, names, new_idx_start):
     return new_set
 
 
+def add_and_name_isl_dims(isl_map, dim_type, names):
+    new_idx_start = isl_map.dim(dim_type)
+    new_map = isl_map.add_dims(dim_type, len(names))
+    for i, name in enumerate(names):
+        new_map = new_map.set_dim_name(dim_type, new_idx_start+i, name)
+    return new_map
+
+
 def reorder_dims_by_name(
         isl_set, dim_type, desired_dims_ordered):
     """Return an isl_set with the dimensions of the specified dim_type
@@ -85,6 +93,23 @@ def ensure_dim_names_match_and_align(obj_map, tgt_map):
     return isl.align_spaces(obj_map, tgt_map)
 
 
+def add_eq_isl_constraint_from_names(isl_map, var1, var2):
+    # add constraint var1 = var2
+    return isl_map.add_constraint(
+               isl.Constraint.eq_from_names(
+                   isl_map.space,
+                   {1: 0, var1: 1, var2: -1}))
+
+
+def add_ne_isl_constraint_from_names(isl_map, var1, var2):
+    # add constraint var1 != var2
+    return isl_map.add_constraint(
+        isl.Constraint.ineq_from_names(isl_map.space, {1: -1, var1: 1, var2: -1})
+        ) | isl_map.add_constraint(
+        isl.Constraint.ineq_from_names(isl_map.space, {1: -1, var2: 1, var1: -1})
+        )
+
+
 def append_marker_to_isl_map_var_names(old_isl_map, dim_type, marker="'"):
     """Return an :class:`islpy.Map` with a marker appended to the specified
     dimension names.

From d28a031b1fcb060f3a0b33651789ef6d29bed4d7 Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Tue, 23 Mar 2021 19:30:43 -0500
Subject: [PATCH 192/460] don't try to deal with appending conc dims inside
 get_lex_order_set/map; instead add them after creating the traditional
 ordering using the existing functions

---
 .../checker/lexicographic_order_map.py        | 55 +++++++------------
 loopy/schedule/checker/schedule.py            | 45 ++++++++++-----
 test/test_linearization_checker.py            | 42 +-------------
 3 files changed, 54 insertions(+), 88 deletions(-)

diff --git a/loopy/schedule/checker/lexicographic_order_map.py b/loopy/schedule/checker/lexicographic_order_map.py
index 9add041c4..7927812b5 100644
--- a/loopy/schedule/checker/lexicographic_order_map.py
+++ b/loopy/schedule/checker/lexicographic_order_map.py
@@ -72,11 +72,9 @@ def get_statement_ordering_map(
 
 
 def get_lex_order_set(
-        before_names, after_names,
-        before_names_concurrent=[],
-        after_names_concurrent=[],
+        dim_names,
         islvars=None,
-        conc_var_comparison_op="eq",
+        in_dim_marker="'",
         ):
     """Return an :class:`islpy.Set` representing a lexicographic ordering
         with the number of dimensions provided in `before_names`
@@ -118,55 +116,48 @@ def get_lex_order_set(
     # TODO update doc
 
     from loopy.schedule.checker.utils import (
-        create_elementwise_comparison_conjunction_set,
+        append_marker_to_strings,
     )
 
+    in_dim_names = append_marker_to_strings(dim_names, marker=in_dim_marker)
+
     # If no islvars passed, make them using the names provided
     # (make sure to pass var names in desired order of space dims)
     if islvars is None:
         islvars = isl.make_zero_and_vars(
-            before_names+before_names_concurrent+after_names+after_names_concurrent,
+            in_dim_names+dim_names,
             [])
 
     # Initialize set with constraint i0' < i0
-    lex_order_set = islvars[before_names[0]].lt_set(islvars[after_names[0]])
+    lex_order_set = islvars[in_dim_names[0]].lt_set(islvars[dim_names[0]])
 
     # For each dim d, starting with d=1, equality_conj_set will be constrained
     # by d equalities, e.g., (i0' = i0 and i1' = i1 and ... i(d-1)' = i(d-1)).
     equality_conj_set = islvars[0].eq_set(islvars[0])  # initialize to 'true'
 
-    for i in range(1, len(before_names)):
+    for i in range(1, len(in_dim_names)):
 
         # Add the next equality constraint to equality_conj_set
         equality_conj_set = equality_conj_set & \
-            islvars[before_names[i-1]].eq_set(islvars[after_names[i-1]])
+            islvars[in_dim_names[i-1]].eq_set(islvars[dim_names[i-1]])
 
         # Create a set constrained by adding a less-than constraint for this dim,
         # e.g., (i1' < i1), to the current equality conjunction set.
         # For each dim d, starting with d=1, this full conjunction will have
         # d equalities and one inequality, e.g.,
         # (i0' = i0 and i1' = i1 and ... i(d-1)' = i(d-1) and id' < id)
-        full_conj_set = islvars[before_names[i]].lt_set(
-            islvars[after_names[i]]) & equality_conj_set
+        full_conj_set = islvars[in_dim_names[i]].lt_set(
+            islvars[dim_names[i]]) & equality_conj_set
 
         # Union this new constraint with the current lex_order_set
         lex_order_set = lex_order_set | full_conj_set
 
-    lex_order_set = lex_order_set & \
-        create_elementwise_comparison_conjunction_set(
-            before_names_concurrent, after_names_concurrent,
-            islvars, op=conc_var_comparison_op,
-            )
-
     return lex_order_set
 
 
 def create_lex_order_map(
         n_dims=None,
-        before_names=None,
-        after_names=None,
-        after_names_concurrent=[],
-        conc_var_comparison_op="eq",
+        dim_names=None,
         in_dim_marker="'",
         ):
     """Return a map from each point in a lexicographic ordering to every
@@ -199,30 +190,22 @@ def create_lex_order_map(
     """
     # TODO update doc
 
-    from loopy.schedule.checker.utils import append_marker_to_strings
-
-    if after_names is None:
-        after_names = ["i%s" % (i) for i in range(n_dims)]
-    if before_names is None:
-        before_names = append_marker_to_strings(after_names, marker=in_dim_marker)
+    if dim_names is None:
+        dim_names = ["i%s" % (i) for i in range(n_dims)]
     if n_dims is None:
-        n_dims = len(after_names)
-    before_names_concurrent = append_marker_to_strings(
-        after_names_concurrent, marker=in_dim_marker)
+        n_dims = len(dim_names)
 
-    assert len(before_names) == len(after_names) == n_dims
+    assert len(dim_names) == n_dims
     dim_type = isl.dim_type
 
     # First, get a set representing the lexicographic ordering.
     lex_order_set = get_lex_order_set(
-        before_names, after_names,
-        before_names_concurrent, after_names_concurrent,
-        conc_var_comparison_op=conc_var_comparison_op,
+        dim_names,
+        in_dim_marker=in_dim_marker,
         )
 
     # Now convert that set to a map.
     lex_map = isl.Map.from_domain(lex_order_set)
     return lex_map.move_dims(
         dim_type.out, 0, dim_type.in_,
-        len(before_names) + len(before_names_concurrent),
-        len(after_names) + len(after_names_concurrent))
+        n_dims, n_dims)
diff --git a/loopy/schedule/checker/schedule.py b/loopy/schedule/checker/schedule.py
index 6212b5e44..65f27f742 100644
--- a/loopy/schedule/checker/schedule.py
+++ b/loopy/schedule/checker/schedule.py
@@ -165,6 +165,12 @@ def generate_pairwise_schedules(
         create_lex_order_map,
         get_statement_ordering_map,
     )
+    from loopy.schedule.checker.utils import (
+        add_and_name_isl_dims,
+        append_marker_to_strings,
+        add_eq_isl_constraint_from_names,
+        add_ne_isl_constraint_from_names,
+    )
 
     all_insn_ids = set().union(*insn_id_pairs)
 
@@ -397,31 +403,36 @@ def _collect_blex_ordering_info(sync_kind):
         # }}}
 
         # Create names for the blex dimensions for sequential loops
-        from loopy.schedule.checker.utils import (
-            append_marker_to_strings,
-        )
         seq_blex_dim_names = [
             BLEX_VAR_PREFIX+str(i) for i in range(n_blex_dims)]
         seq_blex_dim_names_prime = append_marker_to_strings(
             seq_blex_dim_names, marker=BEFORE_MARK)
 
         blex_order_map = create_lex_order_map(
-            before_names=seq_blex_dim_names_prime,
-            after_names=seq_blex_dim_names,
-            after_names_concurrent=conc_lex_dim_names,
-            conc_var_comparison_op="ne",
+            dim_names=seq_blex_dim_names,
             in_dim_marker=BEFORE_MARK,
             )
 
+        # Add lid/gid dims to lex order map
+        blex_order_map = add_and_name_isl_dims(
+            blex_order_map, dt.out, conc_lex_dim_names)
+        blex_order_map = add_and_name_isl_dims(
+            blex_order_map, dt.in_, append_marker_to_strings(conc_lex_dim_names))
+        # Constrain lid/gid vars to be *not* equal
+        # TODO do right thing with conc vars for lblex, gblex case
+        # TODO LEFT OFF HERE
+        for var_name in conc_lex_dim_names:
+            blex_order_map = add_ne_isl_constraint_from_names(
+                    blex_order_map, var_name, var_name+BEFORE_MARK)
+
         iname_to_blex_var = {}
         for iname, dim in iname_to_blex_dim.items():
             iname_to_blex_var[iname] = seq_blex_dim_names[dim]
             iname_to_blex_var[iname+BEFORE_MARK] = seq_blex_dim_names_prime[dim]
 
         # Add params to blex map
-        blex_order_map = blex_order_map.add_dims(dt.param, len(blex_map_params))
-        for i, p in enumerate(blex_map_params):
-            blex_order_map = blex_order_map.set_dim_name(dt.param, i, p)
+        blex_order_map = add_and_name_isl_dims(
+            blex_order_map, dt.param, blex_map_params)
 
         # get a set representing blex_order_map space
         blex_set_template = isl.align_spaces(
@@ -759,12 +770,20 @@ def _get_map_for_stmt(
         # parallel dims are used. (could simplify everything by always using
         # all dims..., which would make maps more complex than necessary)
         lex_order_map = create_lex_order_map(
-            after_names=seq_lex_dim_names,
-            after_names_concurrent=conc_lex_dim_names,
-            conc_var_comparison_op="eq",
+            dim_names=seq_lex_dim_names,
             in_dim_marker=BEFORE_MARK,
             )
 
+        # Add lid/gid dims to lex order map
+        lex_order_map = add_and_name_isl_dims(
+            lex_order_map, dt.out, conc_lex_dim_names)
+        lex_order_map = add_and_name_isl_dims(
+            lex_order_map, dt.in_, append_marker_to_strings(conc_lex_dim_names))
+        # Constrain lid/gid vars to be equal
+        for var_name in conc_lex_dim_names:
+            lex_order_map = add_eq_isl_constraint_from_names(
+                lex_order_map, var_name, var_name+BEFORE_MARK)
+
         # Create statement instance ordering,
         # maps each statement instance to all statement instances occuring later
         sio_seq = get_statement_ordering_map(
diff --git a/test/test_linearization_checker.py b/test/test_linearization_checker.py
index 426f15f47..c0a3e8f95 100644
--- a/test/test_linearization_checker.py
+++ b/test/test_linearization_checker.py
@@ -447,8 +447,7 @@ def test_lex_order_map_creation():
         append_marker_to_isl_map_var_names,
     )
 
-    def _check_lex_map(
-            exp_lex_order_map, n_dims, lid_axes_used=[], gid_axes_used=[]):
+    def _check_lex_map(exp_lex_order_map, n_dims):
 
         # Isl ignores the apostrophes, so explicitly add them
         exp_lex_order_map = append_marker_to_isl_map_var_names(
@@ -456,20 +455,11 @@ def _check_lex_map(
 
         lex_order_map = create_lex_order_map(
             n_dims=n_dims,
-            before_names=["%s%d'" % (LEX_VAR_PREFIX, i) for i in range(n_dims)],
-            after_names=["%s%d" % (LEX_VAR_PREFIX, i) for i in range(n_dims)],
-            after_names_concurrent=[
-                LTAG_VAR_NAMES[i] for i in lid_axes_used] + [
-                GTAG_VAR_NAMES[i] for i in gid_axes_used],
+            dim_names=["%s%d" % (LEX_VAR_PREFIX, i) for i in range(n_dims)],
             )
 
         assert lex_order_map == exp_lex_order_map
-        assert (
-            lex_order_map.get_var_names(isl.dim_type.in_) ==
-            exp_lex_order_map.get_var_names(isl.dim_type.in_))
-        assert (
-            lex_order_map.get_var_names(isl.dim_type.out) ==
-            exp_lex_order_map.get_var_names(isl.dim_type.out))
+        assert lex_order_map.get_var_dict() == exp_lex_order_map.get_var_dict()
 
     exp_lex_order_map = isl.Map(
         "{{ "
@@ -499,32 +489,6 @@ def _check_lex_map(
 
     _check_lex_map(exp_lex_order_map, 1)
 
-    # Lex map for kernel with parallel HW tags
-
-    lid_axes_used = [0, 1]
-    gid_axes_used = [0, 1, 2]
-    hw_par_lex_vars = [
-        LTAG_VAR_NAMES[i] for i in lid_axes_used] + [
-        GTAG_VAR_NAMES[i] for i in gid_axes_used]
-    exp_lex_order_map = isl.Map(
-        "{{ "
-        "[{0}0', {0}1', {0}2', {1}', {2}', {3}', {4}', {5}'] "
-        "-> [{0}0, {0}1, {0}2, {1}, {2}, {3}, {4}, {5}] :"
-        "(("
-        "{0}0' < {0}0 "
-        ") or ("
-        "{0}0'={0}0 and {0}1' < {0}1 "
-        ") or ("
-        "{0}0'={0}0 and {0}1'={0}1 and {0}2' < {0}2 "
-        ")) and ("
-        "{1}' = {1} and {2}' = {2} and {3}' = {3} and {4}' = {4} and {5}' = {5}"
-        ")"
-        "}}".format(LEX_VAR_PREFIX, *hw_par_lex_vars))
-
-    _check_lex_map(
-        exp_lex_order_map, 3,
-        lid_axes_used=lid_axes_used, gid_axes_used=gid_axes_used)
-
 # }}}
 
 

From 805cab45e134febabb3af2318a7c4edb873db501 Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Tue, 23 Mar 2021 19:34:04 -0500
Subject: [PATCH 193/460] rename
 add_dims_to_isl_set()->insert_and_name_isl_dims()

---
 loopy/schedule/checker/schedule.py | 4 ++--
 loopy/schedule/checker/utils.py    | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/loopy/schedule/checker/schedule.py b/loopy/schedule/checker/schedule.py
index 65f27f742..42fecac1e 100644
--- a/loopy/schedule/checker/schedule.py
+++ b/loopy/schedule/checker/schedule.py
@@ -677,7 +677,7 @@ def _leave_loop_blex_processing(scope, leave_iname):
     from loopy.schedule.checker.utils import (
         sorted_union_of_names_in_isl_sets,
         create_symbolic_map_from_tuples,
-        add_dims_to_isl_set,
+        insert_and_name_isl_dims,
     )
 
     def _get_map_for_stmt(
@@ -702,7 +702,7 @@ def _get_map_for_stmt(
 
         # Insert 'statement' dim into domain so that its space allows
         # for intersection with sched map later
-        dom_to_intersect = add_dims_to_isl_set(
+        dom_to_intersect = insert_and_name_isl_dims(
                 dom, dt.set, [STATEMENT_VAR_NAME], 0)
 
         # Each map will map statement instances -> lex time.
diff --git a/loopy/schedule/checker/utils.py b/loopy/schedule/checker/utils.py
index a6636c41c..d8ef1c771 100644
--- a/loopy/schedule/checker/utils.py
+++ b/loopy/schedule/checker/utils.py
@@ -28,7 +28,7 @@ def prettier_map_string(map_obj):
                ).replace("{ ", "{\n").replace(" }", "\n}").replace("; ", ";\n")
 
 
-def add_dims_to_isl_set(isl_set, dim_type, names, new_idx_start):
+def insert_and_name_isl_dims(isl_set, dim_type, names, new_idx_start):
     new_set = isl_set.insert_dims(dim_type, new_idx_start, len(names))
     for i, name in enumerate(names):
         new_set = new_set.set_dim_name(dim_type, new_idx_start+i, name)

From 98aef7c7ad28997141ce7398189c4a4760ae09c3 Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Tue, 23 Mar 2021 19:52:20 -0500
Subject: [PATCH 194/460] for lblex map, constrain gids to be equal

---
 loopy/schedule/checker/schedule.py | 28 +++++++++++++++-------------
 loopy/schedule/checker/utils.py    |  9 ---------
 2 files changed, 15 insertions(+), 22 deletions(-)

diff --git a/loopy/schedule/checker/schedule.py b/loopy/schedule/checker/schedule.py
index 42fecac1e..a0ce65d34 100644
--- a/loopy/schedule/checker/schedule.py
+++ b/loopy/schedule/checker/schedule.py
@@ -169,7 +169,6 @@ def generate_pairwise_schedules(
         add_and_name_isl_dims,
         append_marker_to_strings,
         add_eq_isl_constraint_from_names,
-        add_ne_isl_constraint_from_names,
     )
 
     all_insn_ids = set().union(*insn_id_pairs)
@@ -264,15 +263,15 @@ def generate_pairwise_schedules(
     # Get dim names representing local/group axes for this kernel,
     # and get the dictionary that will be used later to create a
     # constraint requiring {par inames == par axes} in sched
-    l_axes_used = set()
-    g_axes_used = set()
+    lid_lex_dim_names = set()
+    gid_lex_dim_names = set()
     par_iname_constraint_dicts = []
     for iname in knl.all_inames():
         ltag = knl.iname_tags_of_type(iname, LocalIndexTag)
         if ltag:
             # assert len(ltag) == 1  # (should always be true)
             ltag_var = LTAG_VAR_NAMES[ltag.pop().axis]
-            l_axes_used.add(ltag_var)
+            lid_lex_dim_names.add(ltag_var)
             # Represent constraint 'iname = ltag_var' in par_iname_constraint_dicts:
             par_iname_constraint_dicts.append({1: 0, iname: 1, ltag_var: -1})
             continue
@@ -280,11 +279,12 @@ def generate_pairwise_schedules(
         if gtag:
             # assert len(gtag) == 1  # (should always be true)
             gtag_var = GTAG_VAR_NAMES[gtag.pop().axis]
-            g_axes_used.add(gtag_var)
+            gid_lex_dim_names.add(gtag_var)
             # Represent constraint 'iname = gtag_var' in par_iname_constraint_dicts:
             par_iname_constraint_dicts.append({1: 0, iname: 1, gtag_var: -1})
             continue
-    conc_lex_dim_names = sorted(l_axes_used) + sorted(g_axes_used)
+    lid_lex_dim_names = sorted(lid_lex_dim_names)
+    gid_lex_dim_names = sorted(gid_lex_dim_names)
 
     # {{{  Create blex ordering (may later be combined with pass above)
 
@@ -316,6 +316,8 @@ def generate_pairwise_schedules(
 
     # }}}
 
+    conc_lex_dim_names = lid_lex_dim_names + gid_lex_dim_names
+
     def _collect_blex_ordering_info(sync_kind):
 
         # {{{ Construct blueprint for creating blex space and orderings
@@ -418,12 +420,12 @@ def _collect_blex_ordering_info(sync_kind):
             blex_order_map, dt.out, conc_lex_dim_names)
         blex_order_map = add_and_name_isl_dims(
             blex_order_map, dt.in_, append_marker_to_strings(conc_lex_dim_names))
-        # Constrain lid/gid vars to be *not* equal
-        # TODO do right thing with conc vars for lblex, gblex case
-        # TODO LEFT OFF HERE
-        for var_name in conc_lex_dim_names:
-            blex_order_map = add_ne_isl_constraint_from_names(
-                    blex_order_map, var_name, var_name+BEFORE_MARK)
+        if sync_kind == "local":
+            # Constrain gid vars to be equal
+            for var_name in gid_lex_dim_names:
+                blex_order_map = add_eq_isl_constraint_from_names(
+                        blex_order_map, var_name, var_name+BEFORE_MARK)
+        # (if sync_kind == "global", don't need constraints on lid/gid vars)
 
         iname_to_blex_var = {}
         for iname, dim in iname_to_blex_dim.items():
@@ -434,7 +436,7 @@ def _collect_blex_ordering_info(sync_kind):
         blex_order_map = add_and_name_isl_dims(
             blex_order_map, dt.param, blex_map_params)
 
-        # get a set representing blex_order_map space
+        # Get a set representing blex_order_map space
         blex_set_template = isl.align_spaces(
             isl.Map("[ ] -> { [ ] -> [ ] }"), blex_order_map
             ).move_dims(
diff --git a/loopy/schedule/checker/utils.py b/loopy/schedule/checker/utils.py
index d8ef1c771..c079e0a61 100644
--- a/loopy/schedule/checker/utils.py
+++ b/loopy/schedule/checker/utils.py
@@ -101,15 +101,6 @@ def add_eq_isl_constraint_from_names(isl_map, var1, var2):
                    {1: 0, var1: 1, var2: -1}))
 
 
-def add_ne_isl_constraint_from_names(isl_map, var1, var2):
-    # add constraint var1 != var2
-    return isl_map.add_constraint(
-        isl.Constraint.ineq_from_names(isl_map.space, {1: -1, var1: 1, var2: -1})
-        ) | isl_map.add_constraint(
-        isl.Constraint.ineq_from_names(isl_map.space, {1: -1, var2: 1, var1: -1})
-        )
-
-
 def append_marker_to_isl_map_var_names(old_isl_map, dim_type, marker="'"):
     """Return an :class:`islpy.Map` with a marker appended to the specified
     dimension names.

From 54a8364694673f5aaaad3ea8ecfa9cf845ecd2cd Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Tue, 23 Mar 2021 20:06:19 -0500
Subject: [PATCH 195/460] minor cleanup

---
 loopy/schedule/checker/schedule.py | 161 +++--------------------------
 1 file changed, 13 insertions(+), 148 deletions(-)

diff --git a/loopy/schedule/checker/schedule.py b/loopy/schedule/checker/schedule.py
index a0ce65d34..ebf607c99 100644
--- a/loopy/schedule/checker/schedule.py
+++ b/loopy/schedule/checker/schedule.py
@@ -321,11 +321,10 @@ def generate_pairwise_schedules(
     def _collect_blex_ordering_info(sync_kind):
 
         # {{{ Construct blueprint for creating blex space and orderings
-        # TODO combine this pass over the linearization items with the pass above
 
         stmt_inst_to_blex = {}  # map stmt instances to blex space
         iname_to_blex_dim = {}  # map from inames to corresponding blex space dim
-        blex_exclusion_info = {}  # info for creating pairs to subtract from blex order
+        blex_exclusion_info = {}  # info for creating maps to exclude from blex order
         blex_map_params = set()  # params needed in blex map
         n_blex_dims = 1  # number of dims in blex space
         next_blex_pt = [0]  # next tuple of points in blex order
@@ -334,7 +333,7 @@ def _collect_blex_ordering_info(sync_kind):
             if isinstance(lin_item, EnterLoop):
                 enter_iname = lin_item.iname
                 if enter_iname in loops_with_barriers[sync_kind]:
-                   # update next blex pt
+                    # update next blex pt
                     pre_loop_blex_pt = next_blex_pt[:]
                     next_blex_pt[-1] += 1
                     next_blex_pt.append(enter_iname)
@@ -444,7 +443,7 @@ def _collect_blex_ordering_info(sync_kind):
             ).domain()
         blex_set_affs = isl.affs_from_space(blex_set_template.space)
 
-        def _create_subtraction_map_for_iname(iname, blueprint):
+        def _create_excluded_map_for_iname(iname, blueprint):
             # Note: blueprint[FIRST] and blueprint[LAST] contain pwaffs
 
             def _create_blex_set_from_tuple_pair(before, after, wrap_cond=False):
@@ -511,7 +510,7 @@ def _create_blex_set_from_tuple_pair(before, after, wrap_cond=False):
         # subtract unwanted pairs from happens-before blex map
         maps_to_subtract = []
         for iname, subdict in blex_exclusion_info.items():
-            maps_to_subtract.append(_create_subtraction_map_for_iname(iname, subdict))
+            maps_to_subtract.append(_create_excluded_map_for_iname(iname, subdict))
 
         if maps_to_subtract:
             # get union of maps
@@ -532,145 +531,12 @@ def _create_blex_set_from_tuple_pair(before, after, wrap_cond=False):
             seq_blex_dim_names,
             )
 
-    # {{{ combining local and global stuff in single pass (old, TODO remove?)
-    """
-    GLOBAL = "global"
-    LOCAL = "local"
-    stmt_inst_to_blex = {LOCAL: {}, GLOBAL: {}}  # map stmt instances to blex space
-    iname_to_blex_dim = {LOCAL: {}, GLOBAL: {}}  # map from inames to corresponding blex space dim
-    blex_exclusion_info = {LOCAL: {}, GLOBAL: {}}  # info for creating pairs to subtract from blex order
-    blex_map_params = {LOCAL: set(), GLOBAL: set()}  # params needed in blex map
-    next_blex_pt = {LOCAL: [0], GLOBAL: [0]}  # next tuple of points in blex order
-    n_blex_dims = {LOCAL: 1, GLOBAL: 1}  # number of dims in blex space
-
-    def _enter_loop_blex_processing(scope, enter_iname):
-        # scope is either LOCAL or GLOBAL
-
-        pre_loop_blex_pt = next_blex_pt[scope][:]
-        next_blex_pt[scope][-1] += 1
-        next_blex_pt[scope].append(enter_iname)
-        next_blex_pt[scope].append(0)
-
-        # store tuples that will be used to create pairs
-        # that will later be subtracted from happens-before map
-        lbound = iname_bounds_pwaff[enter_iname][0]
-        first_iter_blex_pt = next_blex_pt[scope][:]
-        first_iter_blex_pt[-2] = lbound
-        blex_exclusion_info[scope][enter_iname] = {
-            PRE: tuple(pre_loop_blex_pt),  # make sure to copy
-            TOP: tuple(next_blex_pt[scope]),  # make sure to copy
-            FIRST: tuple(first_iter_blex_pt),  # make sure to copy
-            }
-        blex_map_params[scope] |= set(lbound.get_var_names(dt.param))
-
-    def _leave_loop_blex_processing(scope, leave_iname):
-        # scope is either LOCAL or GLOBAL
-
-        # update max blex dims
-        n_blex_dims[scope] = max(n_blex_dims[scope], len(next_blex_pt[scope]))
-        iname_to_blex_dim[scope][leave_iname] = len(next_blex_pt[scope])-2
-
-        # update next blex pt
-        pre_end_loop_blex_pt = next_blex_pt[scope][:]
-        next_blex_pt[scope].pop()
-        next_blex_pt[scope].pop()
-        next_blex_pt[scope][-1] += 1
-
-        # store tuples that will be used to create pairs
-        # that will later be subtracted from happens-before map
-        ubound = iname_bounds_pwaff[leave_iname][1]
-        last_iter_blex_pt = pre_end_loop_blex_pt[:]
-        last_iter_blex_pt[-2] = ubound
-        blex_exclusion_info[scope][leave_iname][BOTTOM] = tuple(
-            pre_end_loop_blex_pt)
-        blex_exclusion_info[scope][leave_iname][LAST] = tuple(last_iter_blex_pt)
-        blex_exclusion_info[scope][leave_iname][POST] = tuple(next_blex_pt[scope])
-        # (make sure ^these are copies)
-        blex_map_params[scope] |= set(ubound.get_var_names(dt.param))
-
-
-    # do both lblex and gblex processing in single pass through insns
-    for lin_item in lin_items:
-        if isinstance(lin_item, EnterLoop):
-            enter_iname = lin_item.iname
-            if enter_iname in loops_with_lbarriers:
-                _enter_loop_blex_processing(LOCAL, enter_iname)
-               # update next blex pt
-                pre_loop_lblex_pt = next_lblex_pt[:]
-                next_lblex_pt[-1] += 1
-                next_lblex_pt.append(enter_iname)
-                next_lblex_pt.append(0)
-
-                # store tuples that will be used to create pairs
-                # that will later be subtracted from happens-before map
-                lbound = iname_bounds_pwaff[enter_iname][0]
-                first_iter_lblex_pt = next_lblex_pt[:]
-                first_iter_lblex_pt[-2] = lbound
-                lblex_exclusion_info[enter_iname] = {
-                    PRE: tuple(pre_loop_lblex_pt),  # make sure to copy
-                    TOP: tuple(next_lblex_pt),  # make sure to copy
-                    FIRST: tuple(first_iter_lblex_pt),  # make sure to copy
-                    }
-                lblex_map_params |= set(lbound.get_var_names(dt.param))
-            if enter_iname in loops_with_gbarriers:
-                _enter_loop_blex_processing(GLOBAL, enter_iname)
-
-        elif isinstance(lin_item, LeaveLoop):
-            leave_iname = lin_item.iname
-            if leave_iname in loops_with_lbarriers:
-                _leave_loop_blex_processing(LOCAL, leave_iname)
-
-                # update max blex dims
-                n_lblex_dims = max(n_lblex_dims, len(next_lblex_pt))
-                iname_to_lblex_dim[leave_iname] = len(next_lblex_pt)-2
-
-                # update next blex pt
-                pre_end_loop_lblex_pt = next_lblex_pt[:]
-                next_lblex_pt.pop()
-                next_lblex_pt.pop()
-                next_lblex_pt[-1] += 1
-
-                # store tuples that will be used to create pairs
-                # that will later be subtracted from happens-before map
-                ubound = iname_bounds_pwaff[leave_iname][1]
-                last_iter_lblex_pt = pre_end_loop_lblex_pt[:]
-                last_iter_lblex_pt[-2] = ubound
-                lblex_exclusion_info[leave_iname][BOTTOM] = tuple(
-                    pre_end_loop_lblex_pt)
-                lblex_exclusion_info[leave_iname][LAST] = tuple(last_iter_lblex_pt)
-                lblex_exclusion_info[leave_iname][POST] = tuple(next_lblex_pt)
-                # (make sure ^these are copies)
-                lblex_map_params |= set(ubound.get_var_names(dt.param))
-            if leave_iname in loops_with_gbarriers:
-                _leave_loop_blex_processing(GLOBAL, leave_iname)
-
-        elif isinstance(lin_item, RunInstruction):
-            # Add item to stmt_inst_to_lblex
-            lp_insn_id = lin_item.insn_id
-            stmt_inst_to_blex[LOCAL][lp_insn_id] = tuple(next_blex_pt[LOCAL])
-            stmt_inst_to_blex[GLOBAL][lp_insn_id] = tuple(next_blex_pt[GLOBAL])
-
-            stmt_inst_to_lblex[lp_insn_id] = tuple(next_lblex_pt)
-
-            # Don't increment blex dim val
-
-        elif isinstance(lin_item, Barrier):
-
-            next_lblex_pt[-1] += 1
-
-        else:
-            from loopy.schedule import (CallKernel, ReturnFromKernel)
-            # No action needed for these types of linearization item
-            assert isinstance(
-                lin_item, (CallKernel, ReturnFromKernel))
-            pass
-
-    lblex_map_params = sorted(lblex_map_params)
-    """
-    # }}}
-
-    stmt_inst_to_lblex, lblex_order_map, seq_lblex_dim_names = _collect_blex_ordering_info("local")
-    stmt_inst_to_gblex, gblex_order_map, seq_gblex_dim_names = _collect_blex_ordering_info("global")
+    (stmt_inst_to_lblex,
+     lblex_order_map,
+     seq_lblex_dim_names) = _collect_blex_ordering_info("local")
+    (stmt_inst_to_gblex,
+     gblex_order_map,
+     seq_gblex_dim_names) = _collect_blex_ordering_info("global")
 
     # }}}  end blex order/map machinery
 
@@ -796,11 +662,11 @@ def _get_map_for_stmt(
 
         # }}}
 
-        # {{{  Create SIOs for inter-thread cases (lid0' != lid0, etc)
+        # {{{  Create SIOs for intra-group case (gid0' == gid0, etc)
 
         # TODO finish separating lid stuff from gid stuff
 
-        # NOTE: use *unsimplified* lex tuples with blex map, which have already been padded
+        # Use *unsimplified* lex tuples with blex map, which have already been padded
 
         lblex_tuples_padded = [stmt_inst_to_lblex[insn_id] for insn_id in insn_ids]
 
@@ -838,10 +704,9 @@ def _get_map_for_stmt(
             before_marker=BEFORE_MARK,
             )
 
-
         # }}}
 
-        # TODO don't return sched maps?
+        # TODO have option to return sched maps, but default to not returning them
         #pairwise_schedules[tuple(insn_ids)] = tuple(intra_thread_sched_maps)
         pairwise_schedules[tuple(insn_ids)] = (
             (sio_seq, tuple(intra_thread_sched_maps), ),

From 4ab33df56e52a956620c5f57066b202574b8f50b Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Wed, 24 Mar 2021 14:05:40 -0500
Subject: [PATCH 196/460] make returning schedules optional; default to just
 sio

---
 loopy/schedule/checker/__init__.py |  2 ++
 loopy/schedule/checker/schedule.py | 14 +++++++++-----
 test/test_linearization_checker.py |  4 ++++
 3 files changed, 15 insertions(+), 5 deletions(-)

diff --git a/loopy/schedule/checker/__init__.py b/loopy/schedule/checker/__init__.py
index 2684950d0..dba847239 100644
--- a/loopy/schedule/checker/__init__.py
+++ b/loopy/schedule/checker/__init__.py
@@ -27,6 +27,7 @@ def get_schedules_for_statement_pairs(
         knl,
         linearization_items,
         insn_id_pairs,
+        return_schedules=False,
         ):
     r"""For each statement pair in a subset of all statement pairs found in a
     linearized kernel, determine the (relative) order in which the statement
@@ -135,6 +136,7 @@ def get_schedules_for_statement_pairs(
         linearization_items,
         insn_id_pairs,
         loops_to_ignore=conc_loop_inames,
+        return_schedules=return_schedules,
         )
 
     # }}}
diff --git a/loopy/schedule/checker/schedule.py b/loopy/schedule/checker/schedule.py
index ebf607c99..3545e1547 100644
--- a/loopy/schedule/checker/schedule.py
+++ b/loopy/schedule/checker/schedule.py
@@ -125,6 +125,7 @@ def generate_pairwise_schedules(
         lin_items,
         insn_id_pairs,
         loops_to_ignore=set(),
+        return_schedules=False,
         ):
     r"""For each statement pair in a subset of all statement pairs found in a
     linearized kernel, determine the (relative) order in which the statement
@@ -708,10 +709,13 @@ def _get_map_for_stmt(
 
         # TODO have option to return sched maps, but default to not returning them
         #pairwise_schedules[tuple(insn_ids)] = tuple(intra_thread_sched_maps)
-        pairwise_schedules[tuple(insn_ids)] = (
-            (sio_seq, tuple(intra_thread_sched_maps), ),
-            (sio_lconc, tuple(lconc_sched_maps), ),
-            (sio_gconc, tuple(gconc_sched_maps), ),
-            )
+        if return_schedules:
+            pairwise_schedules[tuple(insn_ids)] = (
+                (sio_seq, tuple(intra_thread_sched_maps), ),
+                (sio_lconc, tuple(lconc_sched_maps), ),
+                (sio_gconc, tuple(gconc_sched_maps), ),
+                )
+        else:
+            pairwise_schedules[tuple(insn_ids)] = (sio_seq, sio_lconc, sio_gconc)
 
     return pairwise_schedules
diff --git a/test/test_linearization_checker.py b/test/test_linearization_checker.py
index c0a3e8f95..3ba4d5517 100644
--- a/test/test_linearization_checker.py
+++ b/test/test_linearization_checker.py
@@ -133,6 +133,7 @@ def test_pairwise_schedule_creation():
         lin_knl,
         linearization_items,
         insn_id_pairs,
+        return_schedules=True,
         )
 
     # Relationship between insn_a and insn_b ---------------------------------------
@@ -394,6 +395,7 @@ def test_pairwise_schedule_creation_with_hw_par_tags():
         lin_knl,
         linearization_items,
         stmt_id_pairs,
+        return_schedules=True,
         )
 
     # Relationship between stmt_a and stmt_b ---------------------------------------
@@ -578,6 +580,7 @@ def test_statement_instance_ordering():
         knl,
         linearization_items,
         stmt_id_pairs,
+        return_schedules=True,
         )
 
     # Relationship between stmt_a and stmt_b ---------------------------------------
@@ -715,6 +718,7 @@ def test_statement_instance_ordering_with_hw_par_tags():
         lin_knl,
         linearization_items,
         stmt_id_pairs,
+        return_schedules=True,
         )
 
     # Create string for representing parallel iname condition in sio

From 80fa247c2066d0985639679b4b09e6037d291395 Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Wed, 24 Mar 2021 14:44:22 -0500
Subject: [PATCH 197/460] (WIP) initial first test for schedules w/barriers

---
 test/test_linearization_checker.py | 120 ++++++++++++++++++++++++++---
 1 file changed, 110 insertions(+), 10 deletions(-)

diff --git a/test/test_linearization_checker.py b/test/test_linearization_checker.py
index 3ba4d5517..776558d2c 100644
--- a/test/test_linearization_checker.py
+++ b/test/test_linearization_checker.py
@@ -37,6 +37,7 @@
 )
 from loopy.schedule.checker.schedule import (
     LEX_VAR_PREFIX,
+    BLEX_VAR_PREFIX,
     STATEMENT_VAR_NAME,
     LTAG_VAR_NAMES,
     GTAG_VAR_NAMES,
@@ -58,13 +59,14 @@ def _align_and_compare_maps(maps1, maps2):
         assert map1_aligned == map2
 
 
-def _lex_point_string(dim_vals, lid_inames=[], gid_inames=[]):
+def _lex_point_string(dim_vals, lid_inames=[], gid_inames=[], prefix=LEX_VAR_PREFIX):
     # Return a string describing a point in a lex space
     # by assigning values to lex dimension variables
     # (used to create maps below)
+    # TODO make lid/gid condition optional
 
     return ", ".join(
-        ["%s%d=%s" % (LEX_VAR_PREFIX, idx, str(val))
+        ["%s%d=%s" % (prefix, idx, str(val))
         for idx, val in enumerate(dim_vals)] +
         ["%s=%s" % (LTAG_VAR_NAMES[idx], iname)
         for idx, iname in enumerate(lid_inames)] +
@@ -435,6 +437,101 @@ def test_pairwise_schedule_creation_with_hw_par_tags():
 
     # ------------------------------------------------------------------------------
 
+
+def test_pairwise_schedule_creation_with_lbarriers():
+    import islpy as isl
+    from loopy.schedule.checker import (
+        get_schedules_for_statement_pairs,
+    )
+    from loopy.schedule.checker.utils import (
+        append_marker_to_isl_map_var_names,
+    )
+    dt = isl.dim_type
+
+    knl = lp.make_kernel(
+        [
+            "{[i,j]: 0<=i,j<p}",
+        ],
+        """
+        <>temp0 = 0  {id=0}
+        ... lbarrier  {id=b0,dep=0}
+        <>temp1 = 1  {id=1,dep=b0}
+        for i
+            <>tempi0 = 0  {id=i0,dep=1}
+            ... lbarrier {id=ib0,dep=i0}
+            <>tempi1 = 0  {id=i1,dep=ib0}
+            <>tempi2 = 0  {id=i2,dep=i1}
+            for j
+                <>tempj0 = 0  {id=j0,dep=i2}
+                ... lbarrier {id=jb0,dep=j0}
+                <>tempj1 = 0  {id=j1,dep=jb0}
+            end
+        end
+        <>temp2 = 0  {id=2,dep=i0}
+        """,
+        name="funky",
+        assumptions="p >= 1",
+        lang_version=(2018, 2)
+        )
+
+    # Get a linearization
+    proc_knl = preprocess_kernel(knl)
+    lin_knl = get_one_linearized_kernel(proc_knl)
+    linearization_items = lin_knl.linearization
+
+    insn_id_pairs = [("j1", "2")]
+    scheds = get_schedules_for_statement_pairs(
+        lin_knl, linearization_items, insn_id_pairs, return_schedules=True)
+
+    # Get two maps
+    (
+        sio_seq, (sched_map_before, sched_map_after)
+    ), (
+        sio_lconc, (lconc_sched_before, lconc_sched_after)
+    ), (
+        sio_gconc, (gconc_sched_before, gconc_sched_after)
+    ) = scheds[insn_id_pairs[0]]
+
+    # Create expected maps and compare
+
+    lconc_sched_before_exp = isl.Map(
+        "[p] -> {[%s=0,i,j] -> [%s] : 0 <= i,j < p}"
+        % (
+            STATEMENT_VAR_NAME,
+            _lex_point_string(["2", "i", "2", "j", "1"], prefix=BLEX_VAR_PREFIX),
+            )
+        )
+
+    lconc_sched_after_exp = isl.Map(
+        "[ ] -> {[%s=1] -> [%s]}"
+        % (
+            STATEMENT_VAR_NAME,
+            _lex_point_string(["3", "0", "0", "0", "0"], prefix=BLEX_VAR_PREFIX),
+            )
+        )
+
+    _align_and_compare_maps(
+        [lconc_sched_before_exp, lconc_sched_after_exp],
+        [lconc_sched_before, lconc_sched_after],
+        )
+
+    hab_test_pair = isl.Map(
+        "[p] -> {"
+        "[stmt' = 0, i'=1, j'=p-1] -> [stmt = 1] : p > 2"
+        "}")
+    hab_test_pair = append_marker_to_isl_map_var_names(
+        hab_test_pair, dt.in_, "'")
+
+    #blex_pts_for_test_pair = isl.Map(
+    #    "[p] -> {"
+    #    "[blex0' = 2, blex1' = 1, blex2' = 2, blex3' = p - 1, blex4' = 1] -> "
+    #    "[blex0 = 3, blex1 = 0, blex2 = 0, blex3 = 0, blex4 = 0]"
+    #    "}")
+    #blex_pts_for_test_pair = append_marker_to_isl_map_var_names(
+    #    blex_pts_for_test_pair, dt.in_, "'")
+
+    assert hab_test_pair.is_subset(sio_lconc)
+
 # }}}
 
 
@@ -448,12 +545,13 @@ def test_lex_order_map_creation():
     from loopy.schedule.checker.utils import (
         append_marker_to_isl_map_var_names,
     )
+    dt = isl.dim_type
 
     def _check_lex_map(exp_lex_order_map, n_dims):
 
         # Isl ignores the apostrophes, so explicitly add them
         exp_lex_order_map = append_marker_to_isl_map_var_names(
-            exp_lex_order_map, isl.dim_type.in_, "'")
+            exp_lex_order_map, dt.in_, "'")
 
         lex_order_map = create_lex_order_map(
             n_dims=n_dims,
@@ -529,6 +627,7 @@ def test_statement_instance_ordering():
     from loopy.schedule.checker.utils import (
         append_marker_to_isl_map_var_names,
     )
+    dt = isl.dim_type
 
     # Example kernel (add deps to fix loop order)
     knl = lp.make_kernel(
@@ -593,7 +692,7 @@ def test_statement_instance_ordering():
         )
     # isl ignores these apostrophes, so explicitly add them
     exp_sio_seq = append_marker_to_isl_map_var_names(
-        exp_sio_seq, isl.dim_type.in_, "'")
+        exp_sio_seq, dt.in_, "'")
 
     _check_sio_for_stmt_pair(exp_sio_seq, "stmt_a", "stmt_b", scheds)
 
@@ -607,7 +706,7 @@ def test_statement_instance_ordering():
         )
     # isl ignores these apostrophes, so explicitly add them
     exp_sio_seq = append_marker_to_isl_map_var_names(
-        exp_sio_seq, isl.dim_type.in_, "'")
+        exp_sio_seq, dt.in_, "'")
 
     _check_sio_for_stmt_pair(exp_sio_seq, "stmt_a", "stmt_c", scheds)
 
@@ -621,7 +720,7 @@ def test_statement_instance_ordering():
         )
     # isl ignores these apostrophes, so explicitly add them
     exp_sio_seq = append_marker_to_isl_map_var_names(
-        exp_sio_seq, isl.dim_type.in_, "'")
+        exp_sio_seq, dt.in_, "'")
 
     _check_sio_for_stmt_pair(exp_sio_seq, "stmt_a", "stmt_d", scheds)
 
@@ -637,7 +736,7 @@ def test_statement_instance_ordering():
         )
     # isl ignores these apostrophes, so explicitly add them
     exp_sio_seq = append_marker_to_isl_map_var_names(
-        exp_sio_seq, isl.dim_type.in_, "'")
+        exp_sio_seq, dt.in_, "'")
 
     _check_sio_for_stmt_pair(exp_sio_seq, "stmt_b", "stmt_c", scheds)
 
@@ -651,7 +750,7 @@ def test_statement_instance_ordering():
         )
     # isl ignores these apostrophes, so explicitly add them
     exp_sio_seq = append_marker_to_isl_map_var_names(
-        exp_sio_seq, isl.dim_type.in_, "'")
+        exp_sio_seq, dt.in_, "'")
 
     _check_sio_for_stmt_pair(exp_sio_seq, "stmt_b", "stmt_d", scheds)
 
@@ -665,7 +764,7 @@ def test_statement_instance_ordering():
         )
     # isl ignores these apostrophes, so explicitly add them
     exp_sio_seq = append_marker_to_isl_map_var_names(
-        exp_sio_seq, isl.dim_type.in_, "'")
+        exp_sio_seq, dt.in_, "'")
 
     _check_sio_for_stmt_pair(exp_sio_seq, "stmt_c", "stmt_d", scheds)
 
@@ -679,6 +778,7 @@ def test_statement_instance_ordering_with_hw_par_tags():
         append_marker_to_isl_map_var_names,
         partition_inames_by_concurrency,
     )
+    dt = isl.dim_type
 
     # Example kernel
     knl = lp.make_kernel(
@@ -740,7 +840,7 @@ def test_statement_instance_ordering_with_hw_par_tags():
         )
     # isl ignores these apostrophes, so explicitly add them
     exp_sio_seq = append_marker_to_isl_map_var_names(
-        exp_sio_seq, isl.dim_type.in_, "'")
+        exp_sio_seq, dt.in_, "'")
 
     _check_sio_for_stmt_pair(exp_sio_seq, "stmt_a", "stmt_b", scheds)
 

From 528303ca28703db7e6994cbe8abdfee3618da603 Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Wed, 24 Mar 2021 14:44:45 -0500
Subject: [PATCH 198/460] use isl.Map.get_var_dict() to compare all dim names
 more concisely

---
 loopy/schedule/checker/utils.py | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/loopy/schedule/checker/utils.py b/loopy/schedule/checker/utils.py
index c079e0a61..fb8674d9a 100644
--- a/loopy/schedule/checker/utils.py
+++ b/loopy/schedule/checker/utils.py
@@ -86,9 +86,7 @@ def reorder_dims_by_name(
 def ensure_dim_names_match_and_align(obj_map, tgt_map):
 
     # first make sure names match
-    assert all(
-        set(obj_map.get_var_names(dt)) == set(tgt_map.get_var_names(dt))
-        for dt in [isl.dim_type.in_, isl.dim_type.out, isl.dim_type.param])
+    assert obj_map.get_var_dict() == tgt_map.get_var_dict()
 
     return isl.align_spaces(obj_map, tgt_map)
 

From 8287ef46f9ef10a980442a98ef1637c47284da88 Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Wed, 24 Mar 2021 18:05:52 -0500
Subject: [PATCH 199/460] undo previous (broken) change: don't force all dims
 to be in the same place before aligning dims. duh.

---
 loopy/schedule/checker/utils.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/loopy/schedule/checker/utils.py b/loopy/schedule/checker/utils.py
index fb8674d9a..c079e0a61 100644
--- a/loopy/schedule/checker/utils.py
+++ b/loopy/schedule/checker/utils.py
@@ -86,7 +86,9 @@ def reorder_dims_by_name(
 def ensure_dim_names_match_and_align(obj_map, tgt_map):
 
     # first make sure names match
-    assert obj_map.get_var_dict() == tgt_map.get_var_dict()
+    assert all(
+        set(obj_map.get_var_names(dt)) == set(tgt_map.get_var_names(dt))
+        for dt in [isl.dim_type.in_, isl.dim_type.out, isl.dim_type.param])
 
     return isl.align_spaces(obj_map, tgt_map)
 

From 7b7169cf545314a5ec8f9a59568ff6bba5d5c9c8 Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Wed, 24 Mar 2021 18:07:17 -0500
Subject: [PATCH 200/460] distinguish between the number of blex dims with and
 without parallel lid/gid dims

---
 loopy/schedule/checker/schedule.py | 21 +++++++++++++++------
 1 file changed, 15 insertions(+), 6 deletions(-)

diff --git a/loopy/schedule/checker/schedule.py b/loopy/schedule/checker/schedule.py
index 3545e1547..5392578f6 100644
--- a/loopy/schedule/checker/schedule.py
+++ b/loopy/schedule/checker/schedule.py
@@ -327,7 +327,7 @@ def _collect_blex_ordering_info(sync_kind):
         iname_to_blex_dim = {}  # map from inames to corresponding blex space dim
         blex_exclusion_info = {}  # info for creating maps to exclude from blex order
         blex_map_params = set()  # params needed in blex map
-        n_blex_dims = 1  # number of dims in blex space
+        n_seq_blex_dims = 1  # num dims representing sequential order in blex space
         next_blex_pt = [0]  # next tuple of points in blex order
 
         for lin_item in lin_items:
@@ -357,7 +357,7 @@ def _collect_blex_ordering_info(sync_kind):
                 if leave_iname in loops_with_barriers[sync_kind]:
 
                     # update max blex dims
-                    n_blex_dims = max(n_blex_dims, len(next_blex_pt))
+                    n_seq_blex_dims = max(n_seq_blex_dims, len(next_blex_pt))
                     iname_to_blex_dim[leave_iname] = len(next_blex_pt)-2
 
                     # update next blex pt
@@ -400,13 +400,13 @@ def _collect_blex_ordering_info(sync_kind):
         # the missing dims are the fastest-updating dims, and their values should
         # be zero. Add them.
         for stmt, tup in stmt_inst_to_blex.items():
-            stmt_inst_to_blex[stmt] = _pad_tuple_with_zeros(tup, n_blex_dims)
+            stmt_inst_to_blex[stmt] = _pad_tuple_with_zeros(tup, n_seq_blex_dims)
 
         # }}}
 
         # Create names for the blex dimensions for sequential loops
         seq_blex_dim_names = [
-            BLEX_VAR_PREFIX+str(i) for i in range(n_blex_dims)]
+            BLEX_VAR_PREFIX+str(i) for i in range(n_seq_blex_dims)]
         seq_blex_dim_names_prime = append_marker_to_strings(
             seq_blex_dim_names, marker=BEFORE_MARK)
 
@@ -437,6 +437,7 @@ def _collect_blex_ordering_info(sync_kind):
             blex_order_map, dt.param, blex_map_params)
 
         # Get a set representing blex_order_map space
+        n_blex_dims = n_seq_blex_dims + len(conc_lex_dim_names)
         blex_set_template = isl.align_spaces(
             isl.Map("[ ] -> { [ ] -> [ ] }"), blex_order_map
             ).move_dims(
@@ -456,8 +457,8 @@ def _create_blex_set_from_tuple_pair(before, after, wrap_cond=False):
                 # (assume strings are the inames)
                 before_prime = tuple(
                     v+BEFORE_MARK if isinstance(v, str) else v for v in before)
-                before_padded = _pad_tuple_with_zeros(before_prime, n_blex_dims)
-                after_padded = _pad_tuple_with_zeros(after, n_blex_dims)
+                before_padded = _pad_tuple_with_zeros(before_prime, n_seq_blex_dims)
+                after_padded = _pad_tuple_with_zeros(after, n_seq_blex_dims)
 
                 # assign vals to dims
                 for dim_name, dim_val in zip(
@@ -555,6 +556,8 @@ def _get_map_for_stmt(
         # Get inames domain for statement instance (a BasicSet)
         dom = knl.get_inames_domain(
             knl.id_to_insn[insn_id].within_inames)
+        # (note that this domain may include inames that are
+        # not in stmt.within_inames)
 
         # Create map space (an isl space in current implementation)
         # {('statement', <inames used in statement domain>) ->
@@ -582,6 +585,12 @@ def _get_map_for_stmt(
             lex_points
             )]
 
+        # Note that lex_points may have fewer dims than the out-dim of sched_space
+        # if sched_space includes concurrent lid/gid dims. This is okay because
+        # the following symbolic map creation step, when assigning dim values,
+        # zips the space dims with the lex tuple, and any leftover lid/gid dims
+        # will not be assigned a value yet, which is what we want.
+
         # Create map
         sched_map = create_symbolic_map_from_tuples(
             tuple_pairs_with_domains=zip(tuple_pair, [dom_to_intersect, ]),

From 295f644d5b4df48f9ce39f968879e7129849cbdf Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Wed, 24 Mar 2021 18:42:27 -0500
Subject: [PATCH 201/460] make a sched/sio test for case with local barriers

---
 test/test_linearization_checker.py | 269 ++++++++++++++++-------------
 1 file changed, 145 insertions(+), 124 deletions(-)

diff --git a/test/test_linearization_checker.py b/test/test_linearization_checker.py
index 776558d2c..ee15fc3db 100644
--- a/test/test_linearization_checker.py
+++ b/test/test_linearization_checker.py
@@ -42,6 +42,9 @@
     LTAG_VAR_NAMES,
     GTAG_VAR_NAMES,
 )
+from loopy.schedule.checker.utils import (
+    ensure_dim_names_match_and_align,
+)
 
 logger = logging.getLogger(__name__)
 
@@ -49,9 +52,6 @@
 # {{{ helper functions for map creation/handling
 
 def _align_and_compare_maps(maps1, maps2):
-    from loopy.schedule.checker.utils import (
-        ensure_dim_names_match_and_align,
-    )
 
     for map1, map2 in zip(maps1, maps2):
         # Align maps and compare
@@ -63,7 +63,6 @@ def _lex_point_string(dim_vals, lid_inames=[], gid_inames=[], prefix=LEX_VAR_PRE
     # Return a string describing a point in a lex space
     # by assigning values to lex dimension variables
     # (used to create maps below)
-    # TODO make lid/gid condition optional
 
     return ", ".join(
         ["%s%d=%s" % (prefix, idx, str(val))
@@ -418,7 +417,10 @@ def test_pairwise_schedule_creation_with_hw_par_tags():
         "[pi,pj] -> {[%s=0,i,ii,j,jj] -> [%s] : 0 <= i,ii < pi and 0 <= j,jj < pj}"
         % (
             STATEMENT_VAR_NAME,
-            _lex_point_string(["ii", "0"], lid_inames=["jj", "j"], gid_inames=["i"]),
+            _lex_point_string(
+                ["ii", "0"],
+                lid_inames=["jj", "j"], gid_inames=["i"],
+                ),
             )
         )
 
@@ -426,7 +428,10 @@ def test_pairwise_schedule_creation_with_hw_par_tags():
         "[pi,pj] -> {[%s=1,i,ii,j,jj] -> [%s] : 0 <= i,ii < pi and 0 <= j,jj < pj}"
         % (
             STATEMENT_VAR_NAME,
-            _lex_point_string(["ii", "1"], lid_inames=["jj", "j"], gid_inames=["i"]),
+            _lex_point_string(
+                ["ii", "1"],
+                lid_inames=["jj", "j"], gid_inames=["i"],
+                ),
             )
         )
 
@@ -437,101 +442,6 @@ def test_pairwise_schedule_creation_with_hw_par_tags():
 
     # ------------------------------------------------------------------------------
 
-
-def test_pairwise_schedule_creation_with_lbarriers():
-    import islpy as isl
-    from loopy.schedule.checker import (
-        get_schedules_for_statement_pairs,
-    )
-    from loopy.schedule.checker.utils import (
-        append_marker_to_isl_map_var_names,
-    )
-    dt = isl.dim_type
-
-    knl = lp.make_kernel(
-        [
-            "{[i,j]: 0<=i,j<p}",
-        ],
-        """
-        <>temp0 = 0  {id=0}
-        ... lbarrier  {id=b0,dep=0}
-        <>temp1 = 1  {id=1,dep=b0}
-        for i
-            <>tempi0 = 0  {id=i0,dep=1}
-            ... lbarrier {id=ib0,dep=i0}
-            <>tempi1 = 0  {id=i1,dep=ib0}
-            <>tempi2 = 0  {id=i2,dep=i1}
-            for j
-                <>tempj0 = 0  {id=j0,dep=i2}
-                ... lbarrier {id=jb0,dep=j0}
-                <>tempj1 = 0  {id=j1,dep=jb0}
-            end
-        end
-        <>temp2 = 0  {id=2,dep=i0}
-        """,
-        name="funky",
-        assumptions="p >= 1",
-        lang_version=(2018, 2)
-        )
-
-    # Get a linearization
-    proc_knl = preprocess_kernel(knl)
-    lin_knl = get_one_linearized_kernel(proc_knl)
-    linearization_items = lin_knl.linearization
-
-    insn_id_pairs = [("j1", "2")]
-    scheds = get_schedules_for_statement_pairs(
-        lin_knl, linearization_items, insn_id_pairs, return_schedules=True)
-
-    # Get two maps
-    (
-        sio_seq, (sched_map_before, sched_map_after)
-    ), (
-        sio_lconc, (lconc_sched_before, lconc_sched_after)
-    ), (
-        sio_gconc, (gconc_sched_before, gconc_sched_after)
-    ) = scheds[insn_id_pairs[0]]
-
-    # Create expected maps and compare
-
-    lconc_sched_before_exp = isl.Map(
-        "[p] -> {[%s=0,i,j] -> [%s] : 0 <= i,j < p}"
-        % (
-            STATEMENT_VAR_NAME,
-            _lex_point_string(["2", "i", "2", "j", "1"], prefix=BLEX_VAR_PREFIX),
-            )
-        )
-
-    lconc_sched_after_exp = isl.Map(
-        "[ ] -> {[%s=1] -> [%s]}"
-        % (
-            STATEMENT_VAR_NAME,
-            _lex_point_string(["3", "0", "0", "0", "0"], prefix=BLEX_VAR_PREFIX),
-            )
-        )
-
-    _align_and_compare_maps(
-        [lconc_sched_before_exp, lconc_sched_after_exp],
-        [lconc_sched_before, lconc_sched_after],
-        )
-
-    hab_test_pair = isl.Map(
-        "[p] -> {"
-        "[stmt' = 0, i'=1, j'=p-1] -> [stmt = 1] : p > 2"
-        "}")
-    hab_test_pair = append_marker_to_isl_map_var_names(
-        hab_test_pair, dt.in_, "'")
-
-    #blex_pts_for_test_pair = isl.Map(
-    #    "[p] -> {"
-    #    "[blex0' = 2, blex1' = 1, blex2' = 2, blex3' = p - 1, blex4' = 1] -> "
-    #    "[blex0 = 3, blex1 = 0, blex2 = 0, blex3 = 0, blex4 = 0]"
-    #    "}")
-    #blex_pts_for_test_pair = append_marker_to_isl_map_var_names(
-    #    blex_pts_for_test_pair, dt.in_, "'")
-
-    assert hab_test_pair.is_subset(sio_lconc)
-
 # }}}
 
 
@@ -846,31 +756,142 @@ def test_statement_instance_ordering_with_hw_par_tags():
 
     # ------------------------------------------------------------------------------
 
+# }}}
 
-# TODO when testing happens-after-barrier map, make sure to test parameter assumption issues:
-"""
->>> test_pair2 = append_marker_to_isl_map_var_names(isl.Map("[p] -> { [stmt' = 0, i'=1, j'=p-1] -> [stmt = 1] : p > 1 }"), isl.dim_type.in_, "'")
->>> test_pair3 = append_marker_to_isl_map_var_names(isl.Map("[p] -> { [stmt' = 0, i'=1, j'=p-1] -> [stmt = 1] : p > 2 }"), isl.dim_type.in_, "'")
->>> hab = append_marker_to_isl_map_var_names(isl.Map("[p] -> { [stmt' = 0, i', j'] -> [stmt = 1] : 0 <= i' < p and 0 <= j' <= -2 + p; [stmt' = 0, i', j' = -1 + p] -> [stmt = 1] : 0 <= i' <= -2 + p }"), isl.dim_type.in_, "'")
->>> print(prettier_map_string(hab))
-[p] -> {
-[stmt' = 0, i', j'] -> [stmt = 1] : 0 <= i' < p and 0 <= j' <= -2 + p;
-[stmt' = 0, i', j' = -1 + p] -> [stmt = 1] : 0 <= i' <= -2 + p
-}
->>> print(prettier_map_string(test_pair2))
-[p] -> {
-[stmt' = 0, i' = 1, j' = -1 + p] -> [stmt = 1] : p >= 2
-}
->>> print(prettier_map_string(test_pair3))
-[p] -> {
-[stmt' = 0, i' = 1, j' = -1 + p] -> [stmt = 1] : p >= 3
-}
->>> test_pair2.is_subset(hab)
-False
->>> test_pair3.is_subset(hab)
-True
-"""
 
+# {{{ SIOs and schedules with barriers
+
+def test_sios_and_schedules_with_lbarriers():
+    import islpy as isl
+    from loopy.schedule.checker import (
+        get_schedules_for_statement_pairs,
+    )
+    from loopy.schedule.checker.utils import (
+        append_marker_to_isl_map_var_names,
+    )
+    dt = isl.dim_type
+
+    knl = lp.make_kernel(
+        [
+            #"{[i,j,l0,l1,g0]: 0<=i,j,l0,l1,g0<p}",
+            "{[i,j]: 0<=i,j<p1}",
+            "{[l0,l1,g0]: 0<=l0,l1,g0<p2}",
+        ],
+        """
+        for g0
+            for l0
+                for l1
+                    <>temp0 = 0  {id=0}
+                    ... lbarrier  {id=b0,dep=0}
+                    <>temp1 = 1  {id=1,dep=b0}
+                    for i
+                        <>tempi0 = 0  {id=i0,dep=1}
+                        ... lbarrier {id=ib0,dep=i0}
+                        <>tempi1 = 0  {id=i1,dep=ib0}
+                        <>tempi2 = 0  {id=i2,dep=i1}
+                        for j
+                            <>tempj0 = 0  {id=j0,dep=i2}
+                            ... lbarrier {id=jb0,dep=j0}
+                            <>tempj1 = 0  {id=j1,dep=jb0}
+                        end
+                    end
+                    <>temp2 = 0  {id=2,dep=i0}
+                end
+            end
+        end
+        """,
+        name="funky",
+        assumptions="p1,p2 >= 1",
+        lang_version=(2018, 2)
+        )
+    knl = lp.tag_inames(knl, {"l0": "l.0", "l1": "l.1", "g0": "g.0"})
+
+    # Get a linearization
+    proc_knl = preprocess_kernel(knl)
+    lin_knl = get_one_linearized_kernel(proc_knl)
+    linearization_items = lin_knl.linearization
+
+    insn_id_pairs = [("j1", "2")]
+    scheds = get_schedules_for_statement_pairs(
+        lin_knl, linearization_items, insn_id_pairs, return_schedules=True)
+
+    # Get two maps
+    (
+        sio_seq, (sched_map_before, sched_map_after)
+    ), (
+        sio_lconc, (lconc_sched_before, lconc_sched_after)
+    ), (
+        sio_gconc, (gconc_sched_before, gconc_sched_after)
+    ) = scheds[insn_id_pairs[0]]
+
+    # Create expected maps and compare
+
+    lconc_sched_before_exp = isl.Map(
+        "[p1,p2] -> {[%s=0,i,j,l0,l1,g0] -> [%s] : 0<=i,j<p1 and 0<=l0,l1,g0<p2}"
+        % (
+            STATEMENT_VAR_NAME,
+            _lex_point_string(
+                ["2", "i", "2", "j", "1"],
+                lid_inames=["l0", "l1"], gid_inames=["g0"],
+                prefix=BLEX_VAR_PREFIX,
+                ),
+            )
+        )
+
+    lconc_sched_after_exp = isl.Map(
+        "[p2] -> {[%s=1,l0,l1,g0] -> [%s] : 0<=l0,l1,g0<p2}"
+        % (
+            STATEMENT_VAR_NAME,
+            _lex_point_string(
+                ["3", "0", "0", "0", "0"],
+                lid_inames=["l0", "l1"], gid_inames=["g0"],
+                prefix=BLEX_VAR_PREFIX,
+                ),
+            )
+        )
+
+    _align_and_compare_maps(
+        [lconc_sched_before_exp, lconc_sched_after_exp],
+        [lconc_sched_before, lconc_sched_after],
+        )
+
+    # Check for some example pairs in the sio_lconc map
+
+    # As long as this is not the last iteration of the i loop, then there
+    # should be a barrier between the last instance of statement j1
+    # and statement 2:
+    p1_val = 7
+    last_i_val = p1_val - 1
+    max_non_last_i_val = last_i_val - 1
+
+    wanted_pairs = isl.Map(
+        "[p1,p2] -> {{"
+        "[{0}' = 0, i', j'=p1-1, g0', l0', l1'] -> [{0} = 1, l0, l1, g0] : "
+        "0 <= i' <= {1} and "  # constrain i
+        "p1 >= {2} and "  # constrain p
+        "0<=l0',l1',g0',l0,l1,g0<p2 and g0=g0'"
+        "}}".format(STATEMENT_VAR_NAME, max_non_last_i_val, p1_val))
+    wanted_pairs = append_marker_to_isl_map_var_names(
+        wanted_pairs, dt.in_, "'")
+    wanted_pairs = ensure_dim_names_match_and_align(wanted_pairs, sio_lconc)
+
+    assert wanted_pairs.is_subset(sio_lconc)
+
+    # If this IS the last iteration of the i loop, then there
+    # should NOT be a barrier between the last instance of statement j1
+    # and statement 2:
+    unwanted_pairs = isl.Map(
+        "[p1,p2] -> {{"
+        "[{0}' = 0, i', j'=p1-1, g0', l0', l1'] -> [{0} = 1, l0, l1, g0] : "
+        "0 <= i' <= {1} and "  # constrain i
+        "p1 >= {2} and "  # constrain p
+        "0<=l0',l1',g0',l0,l1,g0<p2 and g0=g0'"
+        "}}".format(STATEMENT_VAR_NAME, last_i_val, p1_val))
+    unwanted_pairs = append_marker_to_isl_map_var_names(
+        unwanted_pairs, dt.in_, "'")
+    unwanted_pairs = ensure_dim_names_match_and_align(unwanted_pairs, sio_lconc)
+
+    assert not unwanted_pairs.is_subset(sio_lconc)
 
 # }}}
 

From f8020e29892230892977a1d6c7f8ae6e2d01b5e3 Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Thu, 25 Mar 2021 13:33:28 -0500
Subject: [PATCH 202/460] in ensure_dim_names_match_and_align(), raise
 informative error when map names don't match instead of just failing on
 assert

---
 loopy/schedule/checker/utils.py | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/loopy/schedule/checker/utils.py b/loopy/schedule/checker/utils.py
index c079e0a61..4ae2fbf64 100644
--- a/loopy/schedule/checker/utils.py
+++ b/loopy/schedule/checker/utils.py
@@ -86,9 +86,14 @@ def reorder_dims_by_name(
 def ensure_dim_names_match_and_align(obj_map, tgt_map):
 
     # first make sure names match
-    assert all(
-        set(obj_map.get_var_names(dt)) == set(tgt_map.get_var_names(dt))
-        for dt in [isl.dim_type.in_, isl.dim_type.out, isl.dim_type.param])
+    if not all(
+            set(obj_map.get_var_names(dt)) == set(tgt_map.get_var_names(dt))
+            for dt in
+            [isl.dim_type.in_, isl.dim_type.out, isl.dim_type.param]):
+        raise ValueError(
+            "Cannot align spaces; names don't match:\n%s\n%s"
+            % (prettier_map_string(obj_map), prettier_map_string(tgt_map))
+            )
 
     return isl.align_spaces(obj_map, tgt_map)
 

From df5192ab726905f8f641e8b40f8e9a1a879b1e0d Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Thu, 25 Mar 2021 13:33:51 -0500
Subject: [PATCH 203/460] add todo to fix doctest

---
 loopy/schedule/checker/__init__.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/loopy/schedule/checker/__init__.py b/loopy/schedule/checker/__init__.py
index dba847239..f2caec55f 100644
--- a/loopy/schedule/checker/__init__.py
+++ b/loopy/schedule/checker/__init__.py
@@ -92,7 +92,7 @@ def get_schedules_for_statement_pairs(
         : 0 <= i < pi and 0 <= j < pj and 0 <= k < pk }
 
     """
-    # TODO update docs now that we're returning SIOs
+    # TODO update docs and docstring now that we're returning SIOs
 
     # {{{ make sure kernel has been preprocessed
 

From 96d4c21636178377e6855f7a127fae88f4049a04 Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Thu, 25 Mar 2021 13:35:13 -0500
Subject: [PATCH 204/460] further lbarrier sio tests; also improve testing code
 with better helper funcs to reduce duplicated code

---
 test/test_linearization_checker.py | 239 ++++++++++++++++++-----------
 1 file changed, 148 insertions(+), 91 deletions(-)

diff --git a/test/test_linearization_checker.py b/test/test_linearization_checker.py
index ee15fc3db..fb15511ed 100644
--- a/test/test_linearization_checker.py
+++ b/test/test_linearization_checker.py
@@ -26,6 +26,7 @@
 import sys
 import numpy as np
 import loopy as lp
+import islpy as isl
 from pyopencl.tools import (  # noqa
     pytest_generate_tests_for_pyopencl
     as pytest_generate_tests)
@@ -41,6 +42,7 @@
     STATEMENT_VAR_NAME,
     LTAG_VAR_NAMES,
     GTAG_VAR_NAMES,
+    BEFORE_MARK,
 )
 from loopy.schedule.checker.utils import (
     ensure_dim_names_match_and_align,
@@ -52,10 +54,15 @@
 # {{{ helper functions for map creation/handling
 
 def _align_and_compare_maps(maps1, maps2):
+    from loopy.schedule.checker.utils import prettier_map_string
 
     for map1, map2 in zip(maps1, maps2):
         # Align maps and compare
         map1_aligned = ensure_dim_names_match_and_align(map1, map2)
+        if map1_aligned != map2:
+            print("Maps not equal:")
+            print(prettier_map_string(map1_aligned))
+            print(prettier_map_string(map2))
         assert map1_aligned == map2
 
 
@@ -73,13 +80,21 @@ def _lex_point_string(dim_vals, lid_inames=[], gid_inames=[], prefix=LEX_VAR_PRE
         for idx, iname in enumerate(gid_inames)]
         )
 
+
+def _isl_map_with_marked_dims(s):
+    from loopy.schedule.checker.utils import (
+        append_marker_to_isl_map_var_names,
+    )
+    dt = isl.dim_type
+    # Isl ignores the apostrophes in map strings, until they are explicitly added
+    return append_marker_to_isl_map_var_names(isl.Map(s), dt.in_, BEFORE_MARK)
+
 # }}}
 
 
 # {{{ test pairwise schedule creation
 
 def test_pairwise_schedule_creation():
-    import islpy as isl
     from loopy.schedule.checker import (
         get_schedules_for_statement_pairs,
     )
@@ -139,7 +154,7 @@ def test_pairwise_schedule_creation():
 
     # Relationship between insn_a and insn_b ---------------------------------------
 
-    # Get two maps
+    # Get maps (include schedules, just for test purposes)
     (
         sio_seq, (sched_before, sched_after)
     ), (
@@ -175,7 +190,7 @@ def test_pairwise_schedule_creation():
     # ------------------------------------------------------------------------------
     # Relationship between insn_a and insn_c ---------------------------------------
 
-    # Get two maps
+    # Get maps (include schedules, just for test purposes)
     (
         sio_seq, (sched_before, sched_after)
     ), (
@@ -211,7 +226,7 @@ def test_pairwise_schedule_creation():
     # ------------------------------------------------------------------------------
     # Relationship between insn_a and insn_d ---------------------------------------
 
-    # Get two maps
+    # Get maps (include schedules, just for test purposes)
     (
         sio_seq, (sched_before, sched_after)
     ), (
@@ -247,7 +262,7 @@ def test_pairwise_schedule_creation():
     # ------------------------------------------------------------------------------
     # Relationship between insn_b and insn_c ---------------------------------------
 
-    # Get two maps
+    # Get maps (include schedules, just for test purposes)
     (
         sio_seq, (sched_before, sched_after)
     ), (
@@ -283,7 +298,7 @@ def test_pairwise_schedule_creation():
     # ------------------------------------------------------------------------------
     # Relationship between insn_b and insn_d ---------------------------------------
 
-    # Get two maps
+    # Get maps (include schedules, just for test purposes)
     (
         sio_seq, (sched_before, sched_after)
     ), (
@@ -319,7 +334,7 @@ def test_pairwise_schedule_creation():
     # ------------------------------------------------------------------------------
     # Relationship between insn_c and insn_d ---------------------------------------
 
-    # Get two maps
+    # Get maps (include schedules, just for test purposes)
     (
         sio_seq, (sched_before, sched_after)
     ), (
@@ -354,7 +369,6 @@ def test_pairwise_schedule_creation():
 
 
 def test_pairwise_schedule_creation_with_hw_par_tags():
-    import islpy as isl
     from loopy.schedule.checker import (
         get_schedules_for_statement_pairs,
     )
@@ -401,7 +415,7 @@ def test_pairwise_schedule_creation_with_hw_par_tags():
 
     # Relationship between stmt_a and stmt_b ---------------------------------------
 
-    # Get two maps
+    # Get maps (include schedules, just for test purposes)
     (
         sio_seq, (sched_before, sched_after)
     ), (
@@ -448,21 +462,12 @@ def test_pairwise_schedule_creation_with_hw_par_tags():
 # {{{ test lex order map creation
 
 def test_lex_order_map_creation():
-    import islpy as isl
     from loopy.schedule.checker.lexicographic_order_map import (
         create_lex_order_map,
     )
-    from loopy.schedule.checker.utils import (
-        append_marker_to_isl_map_var_names,
-    )
-    dt = isl.dim_type
 
     def _check_lex_map(exp_lex_order_map, n_dims):
 
-        # Isl ignores the apostrophes, so explicitly add them
-        exp_lex_order_map = append_marker_to_isl_map_var_names(
-            exp_lex_order_map, dt.in_, "'")
-
         lex_order_map = create_lex_order_map(
             n_dims=n_dims,
             dim_names=["%s%d" % (LEX_VAR_PREFIX, i) for i in range(n_dims)],
@@ -471,7 +476,7 @@ def _check_lex_map(exp_lex_order_map, n_dims):
         assert lex_order_map == exp_lex_order_map
         assert lex_order_map.get_var_dict() == exp_lex_order_map.get_var_dict()
 
-    exp_lex_order_map = isl.Map(
+    exp_lex_order_map = _isl_map_with_marked_dims(
         "{{ "
         "[{0}0', {0}1', {0}2', {0}3', {0}4'] -> [{0}0, {0}1, {0}2, {0}3, {0}4] :"
         "("
@@ -489,7 +494,7 @@ def _check_lex_map(exp_lex_order_map, n_dims):
 
     _check_lex_map(exp_lex_order_map, 5)
 
-    exp_lex_order_map = isl.Map(
+    exp_lex_order_map = _isl_map_with_marked_dims(
         "{{ "
         "[{0}0'] -> [{0}0] :"
         "("
@@ -505,39 +510,49 @@ def _check_lex_map(exp_lex_order_map, n_dims):
 # {{{ test statement instance ordering creation
 
 def _check_sio_for_stmt_pair(
-        exp_sio,
         stmt_id_before,
         stmt_id_after,
-        scheds,
+        sio_dict,
+        exp_sio_seq=None,
+        exp_sched_before_seq=None,
+        exp_sched_after_seq=None,
+        exp_sio_lconc=None,
+        exp_sched_before_lconc=None,
+        exp_sched_after_lconc=None,
+        exp_sio_gconc=None,
+        exp_sched_before_gconc=None,
+        exp_sched_after_gconc=None,
         ):
     from loopy.schedule.checker.utils import (
         ensure_dim_names_match_and_align,
     )
 
-    # Get pairwise schedule
+    # Check whether scheds were included
+    #try:
     (
         sio_seq, (sched_before, sched_after)
     ), (
-        sio_lconc, (lconc_sched_before, lconc_sched_after)
+        sio_lconc, (sched_before_lconc, sched_after_lconc)
     ), (
-        sio_gconc, (gconc_sched_before, gconc_sched_after)
-    ) = scheds[
+        sio_gconc, (sched_before_gconc, sched_after_gconc)
+    ) = sio_dict[
         (stmt_id_before, stmt_id_after)]
+    #except :
+    #    sio_seq, sio_lconc, sio_gconc = sio_dict[
+    #        (stmt_id_before, stmt_id_after)]
 
-    sio_seq_aligned = ensure_dim_names_match_and_align(sio_seq, exp_sio)
+    # TODO left off here, check all passed maps,
+    # en eliminate _align_and_comp...
 
-    assert sio_seq_aligned == exp_sio
+    sio_seq_aligned = ensure_dim_names_match_and_align(exp_sio_seq, sio_seq)
+
+    assert sio_seq_aligned == exp_sio_seq
 
 
 def test_statement_instance_ordering():
-    import islpy as isl
     from loopy.schedule.checker import (
         get_schedules_for_statement_pairs,
     )
-    from loopy.schedule.checker.utils import (
-        append_marker_to_isl_map_var_names,
-    )
-    dt = isl.dim_type
 
     # Example kernel (add deps to fix loop order)
     knl = lp.make_kernel(
@@ -594,49 +609,40 @@ def test_statement_instance_ordering():
 
     # Relationship between stmt_a and stmt_b ---------------------------------------
 
-    exp_sio_seq = isl.Map(
+    exp_sio_seq = _isl_map_with_marked_dims(
         "[pi, pj, pk] -> {{ "
         "[{0}'=0, i', k'] -> [{0}=1, i, j] : "
         "0 <= i,i' < pi and 0 <= k' < pk and 0 <= j < pj and i >= i' "
         "}}".format(STATEMENT_VAR_NAME)
         )
-    # isl ignores these apostrophes, so explicitly add them
-    exp_sio_seq = append_marker_to_isl_map_var_names(
-        exp_sio_seq, dt.in_, "'")
 
-    _check_sio_for_stmt_pair(exp_sio_seq, "stmt_a", "stmt_b", scheds)
+    _check_sio_for_stmt_pair("stmt_a", "stmt_b", scheds, exp_sio_seq=exp_sio_seq)
 
     # Relationship between stmt_a and stmt_c ---------------------------------------
 
-    exp_sio_seq = isl.Map(
+    exp_sio_seq = _isl_map_with_marked_dims(
         "[pi, pj, pk] -> {{ "
         "[{0}'=0, i', k'] -> [{0}=1, i, j] : "
         "0 <= i,i' < pi and 0 <= k' < pk and 0 <= j < pj and i >= i' "
         "}}".format(STATEMENT_VAR_NAME)
         )
-    # isl ignores these apostrophes, so explicitly add them
-    exp_sio_seq = append_marker_to_isl_map_var_names(
-        exp_sio_seq, dt.in_, "'")
 
-    _check_sio_for_stmt_pair(exp_sio_seq, "stmt_a", "stmt_c", scheds)
+    _check_sio_for_stmt_pair("stmt_a", "stmt_c", scheds, exp_sio_seq=exp_sio_seq)
 
     # Relationship between stmt_a and stmt_d ---------------------------------------
 
-    exp_sio_seq = isl.Map(
+    exp_sio_seq = _isl_map_with_marked_dims(
         "[pt, pi, pk] -> {{ "
         "[{0}'=0, i', k'] -> [{0}=1, t] : "
         "0 <= i' < pi and 0 <= k' < pk and 0 <= t < pt "
         "}}".format(STATEMENT_VAR_NAME)
         )
-    # isl ignores these apostrophes, so explicitly add them
-    exp_sio_seq = append_marker_to_isl_map_var_names(
-        exp_sio_seq, dt.in_, "'")
 
-    _check_sio_for_stmt_pair(exp_sio_seq, "stmt_a", "stmt_d", scheds)
+    _check_sio_for_stmt_pair("stmt_a", "stmt_d", scheds, exp_sio_seq=exp_sio_seq)
 
     # Relationship between stmt_b and stmt_c ---------------------------------------
 
-    exp_sio_seq = isl.Map(
+    exp_sio_seq = _isl_map_with_marked_dims(
         "[pi, pj] -> {{ "
         "[{0}'=0, i', j'] -> [{0}=1, i, j] : "
         "0 <= i,i' < pi and 0 <= j,j' < pj and i > i'; "
@@ -644,51 +650,39 @@ def test_statement_instance_ordering():
         "0 <= i' < pi and 0 <= j,j' < pj and j >= j'; "
         "}}".format(STATEMENT_VAR_NAME)
         )
-    # isl ignores these apostrophes, so explicitly add them
-    exp_sio_seq = append_marker_to_isl_map_var_names(
-        exp_sio_seq, dt.in_, "'")
 
-    _check_sio_for_stmt_pair(exp_sio_seq, "stmt_b", "stmt_c", scheds)
+    _check_sio_for_stmt_pair("stmt_b", "stmt_c", scheds, exp_sio_seq=exp_sio_seq)
 
     # Relationship between stmt_b and stmt_d ---------------------------------------
 
-    exp_sio_seq = isl.Map(
+    exp_sio_seq = _isl_map_with_marked_dims(
         "[pt, pi, pj] -> {{ "
         "[{0}'=0, i', j'] -> [{0}=1, t] : "
         "0 <= i' < pi and 0 <= j' < pj and 0 <= t < pt "
         "}}".format(STATEMENT_VAR_NAME)
         )
-    # isl ignores these apostrophes, so explicitly add them
-    exp_sio_seq = append_marker_to_isl_map_var_names(
-        exp_sio_seq, dt.in_, "'")
 
-    _check_sio_for_stmt_pair(exp_sio_seq, "stmt_b", "stmt_d", scheds)
+    _check_sio_for_stmt_pair("stmt_b", "stmt_d", scheds, exp_sio_seq=exp_sio_seq)
 
     # Relationship between stmt_c and stmt_d ---------------------------------------
 
-    exp_sio_seq = isl.Map(
+    exp_sio_seq = _isl_map_with_marked_dims(
         "[pt, pi, pj] -> {{ "
         "[{0}'=0, i', j'] -> [{0}=1, t] : "
         "0 <= i' < pi and 0 <= j' < pj and 0 <= t < pt "
         "}}".format(STATEMENT_VAR_NAME)
         )
-    # isl ignores these apostrophes, so explicitly add them
-    exp_sio_seq = append_marker_to_isl_map_var_names(
-        exp_sio_seq, dt.in_, "'")
 
-    _check_sio_for_stmt_pair(exp_sio_seq, "stmt_c", "stmt_d", scheds)
+    _check_sio_for_stmt_pair("stmt_c", "stmt_d", scheds, exp_sio_seq=exp_sio_seq)
 
 
 def test_statement_instance_ordering_with_hw_par_tags():
-    import islpy as isl
     from loopy.schedule.checker import (
         get_schedules_for_statement_pairs,
     )
     from loopy.schedule.checker.utils import (
-        append_marker_to_isl_map_var_names,
         partition_inames_by_concurrency,
     )
-    dt = isl.dim_type
 
     # Example kernel
     knl = lp.make_kernel(
@@ -738,7 +732,7 @@ def test_statement_instance_ordering_with_hw_par_tags():
 
     # Relationship between stmt_a and stmt_b ---------------------------------------
 
-    exp_sio_seq = isl.Map(
+    exp_sio_seq = _isl_map_with_marked_dims(
         "[pi, pj] -> {{ "
         "[{0}'=0, i', ii', j', jj'] -> [{0}=1, i, ii, j, jj] : "
         "0 <= i,ii,i',ii' < pi and 0 <= j,jj,j',jj' < pj and ii >= ii' "
@@ -748,11 +742,8 @@ def test_statement_instance_ordering_with_hw_par_tags():
             par_iname_condition,
             )
         )
-    # isl ignores these apostrophes, so explicitly add them
-    exp_sio_seq = append_marker_to_isl_map_var_names(
-        exp_sio_seq, dt.in_, "'")
 
-    _check_sio_for_stmt_pair(exp_sio_seq, "stmt_a", "stmt_b", scheds)
+    _check_sio_for_stmt_pair("stmt_a", "stmt_b", scheds, exp_sio_seq=exp_sio_seq)
 
     # ------------------------------------------------------------------------------
 
@@ -762,14 +753,9 @@ def test_statement_instance_ordering_with_hw_par_tags():
 # {{{ SIOs and schedules with barriers
 
 def test_sios_and_schedules_with_lbarriers():
-    import islpy as isl
     from loopy.schedule.checker import (
         get_schedules_for_statement_pairs,
     )
-    from loopy.schedule.checker.utils import (
-        append_marker_to_isl_map_var_names,
-    )
-    dt = isl.dim_type
 
     knl = lp.make_kernel(
         [
@@ -811,23 +797,28 @@ def test_sios_and_schedules_with_lbarriers():
     lin_knl = get_one_linearized_kernel(proc_knl)
     linearization_items = lin_knl.linearization
 
-    insn_id_pairs = [("j1", "2")]
+    insn_id_pairs = [("j1", "2"), ("1", "i0")]
     scheds = get_schedules_for_statement_pairs(
         lin_knl, linearization_items, insn_id_pairs, return_schedules=True)
 
-    # Get two maps
+    # Relationship between j1 and 2 --------------------------------------------
+
+    # Get maps (include schedules, just for test purposes)
     (
         sio_seq, (sched_map_before, sched_map_after)
     ), (
         sio_lconc, (lconc_sched_before, lconc_sched_after)
     ), (
         sio_gconc, (gconc_sched_before, gconc_sched_after)
-    ) = scheds[insn_id_pairs[0]]
+    ) = scheds[("j1", "2")]
 
     # Create expected maps and compare
 
+    conc_iname_bound_str = "0<=l0,l1,g0<p2"
+    conc_iname_bound_str_p = "0<=l0',l1',g0'<p2"
+
     lconc_sched_before_exp = isl.Map(
-        "[p1,p2] -> {[%s=0,i,j,l0,l1,g0] -> [%s] : 0<=i,j<p1 and 0<=l0,l1,g0<p2}"
+        "[p1,p2] -> {[%s=0,i,j,l0,l1,g0] -> [%s] : 0<=i,j<p1 and %s}"
         % (
             STATEMENT_VAR_NAME,
             _lex_point_string(
@@ -835,11 +826,12 @@ def test_sios_and_schedules_with_lbarriers():
                 lid_inames=["l0", "l1"], gid_inames=["g0"],
                 prefix=BLEX_VAR_PREFIX,
                 ),
+            conc_iname_bound_str,
             )
         )
 
     lconc_sched_after_exp = isl.Map(
-        "[p2] -> {[%s=1,l0,l1,g0] -> [%s] : 0<=l0,l1,g0<p2}"
+        "[p2] -> {[%s=1,l0,l1,g0] -> [%s] : %s}"
         % (
             STATEMENT_VAR_NAME,
             _lex_point_string(
@@ -847,32 +839,45 @@ def test_sios_and_schedules_with_lbarriers():
                 lid_inames=["l0", "l1"], gid_inames=["g0"],
                 prefix=BLEX_VAR_PREFIX,
                 ),
+            conc_iname_bound_str,
+            )
+        )
+
+    sio_lconc_exp = _isl_map_with_marked_dims(
+        "[p1,p2] -> {{ "
+        "[{0}'=0,i',j',l0',l1',g0'] -> [{0}=1,l0,l1,g0] : "
+        "((0 <= i' < p1 and 0 <= j' < p1-1) or "  # not last iteration of j
+        " (0 <= i' < p1-1 and 0 <= j' < p1))"  # not last iteration of i
+        "and g0 = g0' "  # within a single group
+        "and {1} and {2}"  # conc iname bounds
+        "}}".format(
+            STATEMENT_VAR_NAME,
+            conc_iname_bound_str,
+            conc_iname_bound_str_p,
             )
         )
 
     _align_and_compare_maps(
-        [lconc_sched_before_exp, lconc_sched_after_exp],
-        [lconc_sched_before, lconc_sched_after],
+        [lconc_sched_before_exp, lconc_sched_after_exp, sio_lconc_exp],
+        [lconc_sched_before, lconc_sched_after, sio_lconc],
         )
 
-    # Check for some example pairs in the sio_lconc map
+    # Check for some key example pairs in the sio_lconc map
 
     # As long as this is not the last iteration of the i loop, then there
     # should be a barrier between the last instance of statement j1
     # and statement 2:
     p1_val = 7
     last_i_val = p1_val - 1
-    max_non_last_i_val = last_i_val - 1
+    max_non_last_i_val = last_i_val - 1  # max i val that isn't the last iteration
 
-    wanted_pairs = isl.Map(
+    wanted_pairs = _isl_map_with_marked_dims(
         "[p1,p2] -> {{"
         "[{0}' = 0, i', j'=p1-1, g0', l0', l1'] -> [{0} = 1, l0, l1, g0] : "
         "0 <= i' <= {1} and "  # constrain i
         "p1 >= {2} and "  # constrain p
         "0<=l0',l1',g0',l0,l1,g0<p2 and g0=g0'"
         "}}".format(STATEMENT_VAR_NAME, max_non_last_i_val, p1_val))
-    wanted_pairs = append_marker_to_isl_map_var_names(
-        wanted_pairs, dt.in_, "'")
     wanted_pairs = ensure_dim_names_match_and_align(wanted_pairs, sio_lconc)
 
     assert wanted_pairs.is_subset(sio_lconc)
@@ -880,19 +885,71 @@ def test_sios_and_schedules_with_lbarriers():
     # If this IS the last iteration of the i loop, then there
     # should NOT be a barrier between the last instance of statement j1
     # and statement 2:
-    unwanted_pairs = isl.Map(
+    unwanted_pairs = _isl_map_with_marked_dims(
         "[p1,p2] -> {{"
         "[{0}' = 0, i', j'=p1-1, g0', l0', l1'] -> [{0} = 1, l0, l1, g0] : "
         "0 <= i' <= {1} and "  # constrain i
         "p1 >= {2} and "  # constrain p
         "0<=l0',l1',g0',l0,l1,g0<p2 and g0=g0'"
         "}}".format(STATEMENT_VAR_NAME, last_i_val, p1_val))
-    unwanted_pairs = append_marker_to_isl_map_var_names(
-        unwanted_pairs, dt.in_, "'")
     unwanted_pairs = ensure_dim_names_match_and_align(unwanted_pairs, sio_lconc)
 
     assert not unwanted_pairs.is_subset(sio_lconc)
 
+    # Relationship between 1 and i0 --------------------------------------------
+
+    # Get maps (include schedules, just for test purposes)
+    (
+        sio_seq, (sched_map_before, sched_map_after)
+    ), (
+        sio_lconc, (lconc_sched_before, lconc_sched_after)
+    ), (
+        sio_gconc, (gconc_sched_before, gconc_sched_after)
+    ) = scheds[("1", "i0")]
+
+    # Create expected maps and compare
+
+    lconc_sched_before_exp = isl.Map(
+        "[p2] -> {[%s=0,l0,l1,g0] -> [%s] : 0<=l0,l1,g0<p2}"
+        % (
+            STATEMENT_VAR_NAME,
+            _lex_point_string(
+                ["1", "0", "0", "0", "0"],
+                lid_inames=["l0", "l1"], gid_inames=["g0"],
+                prefix=BLEX_VAR_PREFIX,
+                ),
+            )
+        )
+
+    lconc_sched_after_exp = isl.Map(
+        "[p1,p2] -> {[%s=1,i,j,l0,l1,g0] -> [%s] : 0<=i,j<p1 and 0<=l0,l1,g0<p2}"
+        % (
+            STATEMENT_VAR_NAME,
+            _lex_point_string(
+                ["2", "i", "0", "0", "0"],
+                lid_inames=["l0", "l1"], gid_inames=["g0"],
+                prefix=BLEX_VAR_PREFIX,
+                ),
+            )
+        )
+
+    sio_lconc_exp = _isl_map_with_marked_dims(
+        "[p1,p2] -> {{ "
+        "[{0}'=0,l0',l1',g0'] -> [{0}=1,i,j,l0,l1,g0] : "
+        "1 <= i < p1 and 0 <= j < p1 "  # not first iteration of i
+        "and g0 = g0' "  # within a single group
+        "and {1} and {2}"  # conc iname bounds
+        "}}".format(
+            STATEMENT_VAR_NAME,
+            conc_iname_bound_str,
+            conc_iname_bound_str_p,
+            )
+        )
+
+    _align_and_compare_maps(
+        [lconc_sched_before_exp, lconc_sched_after_exp, sio_lconc_exp],
+        [lconc_sched_before, lconc_sched_after, sio_lconc],
+        )
 # }}}
 
 

From 46e1bb198486f469fe36a53eb6d5ede6f834d6e3 Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Thu, 25 Mar 2021 19:04:03 -0500
Subject: [PATCH 205/460] use new-and-improved _check_sio_for_stmt_pair() to
 make tests more concise

---
 test/test_linearization_checker.py | 307 ++++++++++++-----------------
 1 file changed, 125 insertions(+), 182 deletions(-)

diff --git a/test/test_linearization_checker.py b/test/test_linearization_checker.py
index fb15511ed..be73e1bb2 100644
--- a/test/test_linearization_checker.py
+++ b/test/test_linearization_checker.py
@@ -53,10 +53,10 @@
 
 # {{{ helper functions for map creation/handling
 
-def _align_and_compare_maps(maps1, maps2):
+def _align_and_compare_maps(maps):
     from loopy.schedule.checker.utils import prettier_map_string
 
-    for map1, map2 in zip(maps1, maps2):
+    for map1, map2 in maps:
         # Align maps and compare
         map1_aligned = ensure_dim_names_match_and_align(map1, map2)
         if map1_aligned != map2:
@@ -149,24 +149,14 @@ def test_pairwise_schedule_creation():
         lin_knl,
         linearization_items,
         insn_id_pairs,
-        return_schedules=True,
+        return_schedules=True,  # include schedules for testing
         )
 
     # Relationship between insn_a and insn_b ---------------------------------------
 
-    # Get maps (include schedules, just for test purposes)
-    (
-        sio_seq, (sched_before, sched_after)
-    ), (
-        sio_lconc, (lconc_sched_before, lconc_sched_after)
-    ), (
-        sio_gconc, (gconc_sched_before, gconc_sched_after)
-    ) = scheds[
-        ("insn_a", "insn_b")]
-
     # Create expected maps and compare
 
-    sched_before_exp = isl.Map(
+    sched_before_seq_exp = isl.Map(
         "[pi, pk] -> { [%s=0, i, k] -> [%s] : 0 <= i < pi and 0 <= k < pk }"
         % (
             STATEMENT_VAR_NAME,
@@ -174,7 +164,7 @@ def test_pairwise_schedule_creation():
             )
         )
 
-    sched_after_exp = isl.Map(
+    sched_after_seq_exp = isl.Map(
         "[pi, pj] -> { [%s=1, i, j] -> [%s] : 0 <= i < pi and 0 <= j < pj }"
         % (
             STATEMENT_VAR_NAME,
@@ -182,27 +172,18 @@ def test_pairwise_schedule_creation():
             )
         )
 
-    _align_and_compare_maps(
-        [sched_before_exp, sched_after_exp],
-        [sched_before, sched_after],
+    _check_sio_for_stmt_pair(
+        "insn_a", "insn_b", scheds,
+        sched_before_seq_exp=sched_before_seq_exp,
+        sched_after_seq_exp=sched_after_seq_exp,
         )
 
     # ------------------------------------------------------------------------------
     # Relationship between insn_a and insn_c ---------------------------------------
 
-    # Get maps (include schedules, just for test purposes)
-    (
-        sio_seq, (sched_before, sched_after)
-    ), (
-        sio_lconc, (lconc_sched_before, lconc_sched_after)
-    ), (
-        sio_gconc, (gconc_sched_before, gconc_sched_after)
-    ) = scheds[
-        ("insn_a", "insn_c")]
-
     # Create expected maps and compare
 
-    sched_before_exp = isl.Map(
+    sched_before_seq_exp = isl.Map(
         "[pi, pk] -> { [%s=0, i, k] -> [%s] : 0 <= i < pi and 0 <= k < pk }"
         % (
             STATEMENT_VAR_NAME,
@@ -210,7 +191,7 @@ def test_pairwise_schedule_creation():
             )
         )
 
-    sched_after_exp = isl.Map(
+    sched_after_seq_exp = isl.Map(
         "[pi, pj] -> { [%s=1, i, j] -> [%s] : 0 <= i < pi and 0 <= j < pj }"
         % (
             STATEMENT_VAR_NAME,
@@ -218,27 +199,18 @@ def test_pairwise_schedule_creation():
             )
         )
 
-    _align_and_compare_maps(
-        [sched_before_exp, sched_after_exp],
-        [sched_before, sched_after],
+    _check_sio_for_stmt_pair(
+        "insn_a", "insn_c", scheds,
+        sched_before_seq_exp=sched_before_seq_exp,
+        sched_after_seq_exp=sched_after_seq_exp,
         )
 
     # ------------------------------------------------------------------------------
     # Relationship between insn_a and insn_d ---------------------------------------
 
-    # Get maps (include schedules, just for test purposes)
-    (
-        sio_seq, (sched_before, sched_after)
-    ), (
-        sio_lconc, (lconc_sched_before, lconc_sched_after)
-    ), (
-        sio_gconc, (gconc_sched_before, gconc_sched_after)
-    ) = scheds[
-        ("insn_a", "insn_d")]
-
     # Create expected maps and compare
 
-    sched_before_exp = isl.Map(
+    sched_before_seq_exp = isl.Map(
         "[pi, pk] -> { [%s=0, i, k] -> [%s] : 0 <= i < pi and 0 <= k < pk }"
         % (
             STATEMENT_VAR_NAME,
@@ -246,7 +218,7 @@ def test_pairwise_schedule_creation():
             )
         )
 
-    sched_after_exp = isl.Map(
+    sched_after_seq_exp = isl.Map(
         "[pt] -> { [%s=1, t] -> [%s] : 0 <= t < pt }"
         % (
             STATEMENT_VAR_NAME,
@@ -254,27 +226,18 @@ def test_pairwise_schedule_creation():
             )
         )
 
-    _align_and_compare_maps(
-        [sched_before_exp, sched_after_exp],
-        [sched_before, sched_after],
+    _check_sio_for_stmt_pair(
+        "insn_a", "insn_d", scheds,
+        sched_before_seq_exp=sched_before_seq_exp,
+        sched_after_seq_exp=sched_after_seq_exp,
         )
 
     # ------------------------------------------------------------------------------
     # Relationship between insn_b and insn_c ---------------------------------------
 
-    # Get maps (include schedules, just for test purposes)
-    (
-        sio_seq, (sched_before, sched_after)
-    ), (
-        sio_lconc, (lconc_sched_before, lconc_sched_after)
-    ), (
-        sio_gconc, (gconc_sched_before, gconc_sched_after)
-    ) = scheds[
-        ("insn_b", "insn_c")]
-
     # Create expected maps and compare
 
-    sched_before_exp = isl.Map(
+    sched_before_seq_exp = isl.Map(
         "[pi, pj] -> { [%s=0, i, j] -> [%s] : 0 <= i < pi and 0 <= j < pj }"
         % (
             STATEMENT_VAR_NAME,
@@ -282,7 +245,7 @@ def test_pairwise_schedule_creation():
             )
         )
 
-    sched_after_exp = isl.Map(
+    sched_after_seq_exp = isl.Map(
         "[pi, pj] -> { [%s=1, i, j] -> [%s] : 0 <= i < pi and 0 <= j < pj }"
         % (
             STATEMENT_VAR_NAME,
@@ -290,27 +253,18 @@ def test_pairwise_schedule_creation():
             )
         )
 
-    _align_and_compare_maps(
-        [sched_before_exp, sched_after_exp],
-        [sched_before, sched_after],
+    _check_sio_for_stmt_pair(
+        "insn_b", "insn_c", scheds,
+        sched_before_seq_exp=sched_before_seq_exp,
+        sched_after_seq_exp=sched_after_seq_exp,
         )
 
     # ------------------------------------------------------------------------------
     # Relationship between insn_b and insn_d ---------------------------------------
 
-    # Get maps (include schedules, just for test purposes)
-    (
-        sio_seq, (sched_before, sched_after)
-    ), (
-        sio_lconc, (lconc_sched_before, lconc_sched_after)
-    ), (
-        sio_gconc, (gconc_sched_before, gconc_sched_after)
-    ) = scheds[
-        ("insn_b", "insn_d")]
-
     # Create expected maps and compare
 
-    sched_before_exp = isl.Map(
+    sched_before_seq_exp = isl.Map(
         "[pi, pj] -> { [%s=0, i, j] -> [%s] : 0 <= i < pi and 0 <= j < pj }"
         % (
             STATEMENT_VAR_NAME,
@@ -318,7 +272,7 @@ def test_pairwise_schedule_creation():
             )
         )
 
-    sched_after_exp = isl.Map(
+    sched_after_seq_exp = isl.Map(
         "[pt] -> { [%s=1, t] -> [%s] : 0 <= t < pt }"
         % (
             STATEMENT_VAR_NAME,
@@ -326,27 +280,18 @@ def test_pairwise_schedule_creation():
             )
         )
 
-    _align_and_compare_maps(
-        [sched_before_exp, sched_after_exp],
-        [sched_before, sched_after],
+    _check_sio_for_stmt_pair(
+        "insn_b", "insn_d", scheds,
+        sched_before_seq_exp=sched_before_seq_exp,
+        sched_after_seq_exp=sched_after_seq_exp,
         )
 
     # ------------------------------------------------------------------------------
     # Relationship between insn_c and insn_d ---------------------------------------
 
-    # Get maps (include schedules, just for test purposes)
-    (
-        sio_seq, (sched_before, sched_after)
-    ), (
-        sio_lconc, (lconc_sched_before, lconc_sched_after)
-    ), (
-        sio_gconc, (gconc_sched_before, gconc_sched_after)
-    ) = scheds[
-        ("insn_c", "insn_d")]
-
     # Create expected maps and compare
 
-    sched_before_exp = isl.Map(
+    sched_before_seq_exp = isl.Map(
         "[pi, pj] -> { [%s=0, i, j] -> [%s] : 0 <= i < pi and 0 <= j < pj }"
         % (
             STATEMENT_VAR_NAME,
@@ -354,7 +299,7 @@ def test_pairwise_schedule_creation():
             )
         )
 
-    sched_after_exp = isl.Map(
+    sched_after_seq_exp = isl.Map(
         "[pt] -> { [%s=1, t] -> [%s] : 0 <= t < pt }"
         % (
             STATEMENT_VAR_NAME,
@@ -362,9 +307,10 @@ def test_pairwise_schedule_creation():
             )
         )
 
-    _align_and_compare_maps(
-        [sched_before_exp, sched_after_exp],
-        [sched_before, sched_after],
+    _check_sio_for_stmt_pair(
+        "insn_c", "insn_d", scheds,
+        sched_before_seq_exp=sched_before_seq_exp,
+        sched_after_seq_exp=sched_after_seq_exp,
         )
 
 
@@ -415,19 +361,9 @@ def test_pairwise_schedule_creation_with_hw_par_tags():
 
     # Relationship between stmt_a and stmt_b ---------------------------------------
 
-    # Get maps (include schedules, just for test purposes)
-    (
-        sio_seq, (sched_before, sched_after)
-    ), (
-        sio_lconc, (lconc_sched_before, lconc_sched_after)
-    ), (
-        sio_gconc, (gconc_sched_before, gconc_sched_after)
-    ) = scheds[
-        ("stmt_a", "stmt_b")]
-
     # Create expected maps and compare
 
-    sched_before_exp = isl.Map(
+    sched_before_seq_exp = isl.Map(
         "[pi,pj] -> {[%s=0,i,ii,j,jj] -> [%s] : 0 <= i,ii < pi and 0 <= j,jj < pj}"
         % (
             STATEMENT_VAR_NAME,
@@ -438,7 +374,7 @@ def test_pairwise_schedule_creation_with_hw_par_tags():
             )
         )
 
-    sched_after_exp = isl.Map(
+    sched_after_seq_exp = isl.Map(
         "[pi,pj] -> {[%s=1,i,ii,j,jj] -> [%s] : 0 <= i,ii < pi and 0 <= j,jj < pj}"
         % (
             STATEMENT_VAR_NAME,
@@ -449,9 +385,10 @@ def test_pairwise_schedule_creation_with_hw_par_tags():
             )
         )
 
-    _align_and_compare_maps(
-        [sched_before_exp, sched_after_exp],
-        [sched_before, sched_after],
+    _check_sio_for_stmt_pair(
+        "stmt_a", "stmt_b", scheds,
+        sched_before_seq_exp=sched_before_seq_exp,
+        sched_after_seq_exp=sched_after_seq_exp,
         )
 
     # ------------------------------------------------------------------------------
@@ -513,40 +450,48 @@ def _check_sio_for_stmt_pair(
         stmt_id_before,
         stmt_id_after,
         sio_dict,
-        exp_sio_seq=None,
-        exp_sched_before_seq=None,
-        exp_sched_after_seq=None,
-        exp_sio_lconc=None,
-        exp_sched_before_lconc=None,
-        exp_sched_after_lconc=None,
-        exp_sio_gconc=None,
-        exp_sched_before_gconc=None,
-        exp_sched_after_gconc=None,
+        sio_seq_exp=None,
+        sched_before_seq_exp=None,
+        sched_after_seq_exp=None,
+        sio_lconc_exp=None,
+        sched_before_lconc_exp=None,
+        sched_after_lconc_exp=None,
+        sio_gconc_exp=None,
+        sched_before_gconc_exp=None,
+        sched_after_gconc_exp=None,
         ):
-    from loopy.schedule.checker.utils import (
-        ensure_dim_names_match_and_align,
-    )
-
-    # Check whether scheds were included
-    #try:
-    (
-        sio_seq, (sched_before, sched_after)
-    ), (
-        sio_lconc, (sched_before_lconc, sched_after_lconc)
-    ), (
-        sio_gconc, (sched_before_gconc, sched_after_gconc)
-    ) = sio_dict[
-        (stmt_id_before, stmt_id_after)]
-    #except :
-    #    sio_seq, sio_lconc, sio_gconc = sio_dict[
-    #        (stmt_id_before, stmt_id_after)]
-
-    # TODO left off here, check all passed maps,
-    # en eliminate _align_and_comp...
 
-    sio_seq_aligned = ensure_dim_names_match_and_align(exp_sio_seq, sio_seq)
+    maps_found = sio_dict[(stmt_id_before, stmt_id_after)]
+
+    # Check whether scheds were included in sio_dict
+    if isinstance(maps_found[0], tuple):
+        # Scheds were included
+        (
+            sio_seq, (sched_before_seq, sched_after_seq)
+        ), (
+            sio_lconc, (sched_before_lconc, sched_after_lconc)
+        ), (
+            sio_gconc, (sched_before_gconc, sched_after_gconc)
+        ) = maps_found
+        map_candidates = zip([
+            sio_seq_exp, sched_before_seq_exp, sched_after_seq_exp,
+            sio_lconc_exp, sched_before_lconc_exp, sched_after_lconc_exp,
+            sio_gconc_exp, sched_before_gconc_exp, sched_after_gconc_exp,
+            ], [
+            sio_seq, sched_before_seq, sched_after_seq,
+            sio_lconc, sched_before_lconc, sched_after_lconc,
+            sio_gconc, sched_before_gconc, sched_after_gconc,
+            ])
+    else:
+        # Scheds not included
+        sio_seq, sio_lconc, sio_gconc = maps_found
+        map_candidates = zip(
+            [sio_seq_exp, sio_lconc_exp, sio_gconc_exp, ],
+            [sio_seq, sio_lconc, sio_gconc, ])
 
-    assert sio_seq_aligned == exp_sio_seq
+    # Only compare to maps that were passed
+    maps_to_compare = [(m1, m2) for m1, m2 in map_candidates if m1 is not None]
+    _align_and_compare_maps(maps_to_compare)
 
 
 def test_statement_instance_ordering():
@@ -609,40 +554,40 @@ def test_statement_instance_ordering():
 
     # Relationship between stmt_a and stmt_b ---------------------------------------
 
-    exp_sio_seq = _isl_map_with_marked_dims(
+    sio_seq_exp = _isl_map_with_marked_dims(
         "[pi, pj, pk] -> {{ "
         "[{0}'=0, i', k'] -> [{0}=1, i, j] : "
         "0 <= i,i' < pi and 0 <= k' < pk and 0 <= j < pj and i >= i' "
         "}}".format(STATEMENT_VAR_NAME)
         )
 
-    _check_sio_for_stmt_pair("stmt_a", "stmt_b", scheds, exp_sio_seq=exp_sio_seq)
+    _check_sio_for_stmt_pair("stmt_a", "stmt_b", scheds, sio_seq_exp=sio_seq_exp)
 
     # Relationship between stmt_a and stmt_c ---------------------------------------
 
-    exp_sio_seq = _isl_map_with_marked_dims(
+    sio_seq_exp = _isl_map_with_marked_dims(
         "[pi, pj, pk] -> {{ "
         "[{0}'=0, i', k'] -> [{0}=1, i, j] : "
         "0 <= i,i' < pi and 0 <= k' < pk and 0 <= j < pj and i >= i' "
         "}}".format(STATEMENT_VAR_NAME)
         )
 
-    _check_sio_for_stmt_pair("stmt_a", "stmt_c", scheds, exp_sio_seq=exp_sio_seq)
+    _check_sio_for_stmt_pair("stmt_a", "stmt_c", scheds, sio_seq_exp=sio_seq_exp)
 
     # Relationship between stmt_a and stmt_d ---------------------------------------
 
-    exp_sio_seq = _isl_map_with_marked_dims(
+    sio_seq_exp = _isl_map_with_marked_dims(
         "[pt, pi, pk] -> {{ "
         "[{0}'=0, i', k'] -> [{0}=1, t] : "
         "0 <= i' < pi and 0 <= k' < pk and 0 <= t < pt "
         "}}".format(STATEMENT_VAR_NAME)
         )
 
-    _check_sio_for_stmt_pair("stmt_a", "stmt_d", scheds, exp_sio_seq=exp_sio_seq)
+    _check_sio_for_stmt_pair("stmt_a", "stmt_d", scheds, sio_seq_exp=sio_seq_exp)
 
     # Relationship between stmt_b and stmt_c ---------------------------------------
 
-    exp_sio_seq = _isl_map_with_marked_dims(
+    sio_seq_exp = _isl_map_with_marked_dims(
         "[pi, pj] -> {{ "
         "[{0}'=0, i', j'] -> [{0}=1, i, j] : "
         "0 <= i,i' < pi and 0 <= j,j' < pj and i > i'; "
@@ -651,29 +596,29 @@ def test_statement_instance_ordering():
         "}}".format(STATEMENT_VAR_NAME)
         )
 
-    _check_sio_for_stmt_pair("stmt_b", "stmt_c", scheds, exp_sio_seq=exp_sio_seq)
+    _check_sio_for_stmt_pair("stmt_b", "stmt_c", scheds, sio_seq_exp=sio_seq_exp)
 
     # Relationship between stmt_b and stmt_d ---------------------------------------
 
-    exp_sio_seq = _isl_map_with_marked_dims(
+    sio_seq_exp = _isl_map_with_marked_dims(
         "[pt, pi, pj] -> {{ "
         "[{0}'=0, i', j'] -> [{0}=1, t] : "
         "0 <= i' < pi and 0 <= j' < pj and 0 <= t < pt "
         "}}".format(STATEMENT_VAR_NAME)
         )
 
-    _check_sio_for_stmt_pair("stmt_b", "stmt_d", scheds, exp_sio_seq=exp_sio_seq)
+    _check_sio_for_stmt_pair("stmt_b", "stmt_d", scheds, sio_seq_exp=sio_seq_exp)
 
     # Relationship between stmt_c and stmt_d ---------------------------------------
 
-    exp_sio_seq = _isl_map_with_marked_dims(
+    sio_seq_exp = _isl_map_with_marked_dims(
         "[pt, pi, pj] -> {{ "
         "[{0}'=0, i', j'] -> [{0}=1, t] : "
         "0 <= i' < pi and 0 <= j' < pj and 0 <= t < pt "
         "}}".format(STATEMENT_VAR_NAME)
         )
 
-    _check_sio_for_stmt_pair("stmt_c", "stmt_d", scheds, exp_sio_seq=exp_sio_seq)
+    _check_sio_for_stmt_pair("stmt_c", "stmt_d", scheds, sio_seq_exp=sio_seq_exp)
 
 
 def test_statement_instance_ordering_with_hw_par_tags():
@@ -732,7 +677,7 @@ def test_statement_instance_ordering_with_hw_par_tags():
 
     # Relationship between stmt_a and stmt_b ---------------------------------------
 
-    exp_sio_seq = _isl_map_with_marked_dims(
+    sio_seq_exp = _isl_map_with_marked_dims(
         "[pi, pj] -> {{ "
         "[{0}'=0, i', ii', j', jj'] -> [{0}=1, i, ii, j, jj] : "
         "0 <= i,ii,i',ii' < pi and 0 <= j,jj,j',jj' < pj and ii >= ii' "
@@ -743,7 +688,7 @@ def test_statement_instance_ordering_with_hw_par_tags():
             )
         )
 
-    _check_sio_for_stmt_pair("stmt_a", "stmt_b", scheds, exp_sio_seq=exp_sio_seq)
+    _check_sio_for_stmt_pair("stmt_a", "stmt_b", scheds, sio_seq_exp=sio_seq_exp)
 
     # ------------------------------------------------------------------------------
 
@@ -799,25 +744,18 @@ def test_sios_and_schedules_with_lbarriers():
 
     insn_id_pairs = [("j1", "2"), ("1", "i0")]
     scheds = get_schedules_for_statement_pairs(
-        lin_knl, linearization_items, insn_id_pairs, return_schedules=True)
+        lin_knl, linearization_items, insn_id_pairs,
+        return_schedules=True,  # include schedules for testing
+        )
 
     # Relationship between j1 and 2 --------------------------------------------
 
-    # Get maps (include schedules, just for test purposes)
-    (
-        sio_seq, (sched_map_before, sched_map_after)
-    ), (
-        sio_lconc, (lconc_sched_before, lconc_sched_after)
-    ), (
-        sio_gconc, (gconc_sched_before, gconc_sched_after)
-    ) = scheds[("j1", "2")]
-
     # Create expected maps and compare
 
     conc_iname_bound_str = "0<=l0,l1,g0<p2"
     conc_iname_bound_str_p = "0<=l0',l1',g0'<p2"
 
-    lconc_sched_before_exp = isl.Map(
+    sched_before_lconc_exp = isl.Map(
         "[p1,p2] -> {[%s=0,i,j,l0,l1,g0] -> [%s] : 0<=i,j<p1 and %s}"
         % (
             STATEMENT_VAR_NAME,
@@ -830,7 +768,7 @@ def test_sios_and_schedules_with_lbarriers():
             )
         )
 
-    lconc_sched_after_exp = isl.Map(
+    sched_after_lconc_exp = isl.Map(
         "[p2] -> {[%s=1,l0,l1,g0] -> [%s] : %s}"
         % (
             STATEMENT_VAR_NAME,
@@ -857,13 +795,24 @@ def test_sios_and_schedules_with_lbarriers():
             )
         )
 
-    _align_and_compare_maps(
-        [lconc_sched_before_exp, lconc_sched_after_exp, sio_lconc_exp],
-        [lconc_sched_before, lconc_sched_after, sio_lconc],
+    _check_sio_for_stmt_pair(
+        "j1", "2", scheds,
+        sio_lconc_exp=sio_lconc_exp,
+        sched_before_lconc_exp=sched_before_lconc_exp,
+        sched_after_lconc_exp=sched_after_lconc_exp,
         )
 
     # Check for some key example pairs in the sio_lconc map
 
+    # Get maps
+    (
+        sio_seq, (sched_map_before, sched_map_after)
+    ), (
+        sio_lconc, (sched_before_lconc, sched_after_lconc)
+    ), (
+        sio_gconc, (sched_before_gconc, sched_after_gconc)
+    ) = scheds[("j1", "2")]
+
     # As long as this is not the last iteration of the i loop, then there
     # should be a barrier between the last instance of statement j1
     # and statement 2:
@@ -898,18 +847,9 @@ def test_sios_and_schedules_with_lbarriers():
 
     # Relationship between 1 and i0 --------------------------------------------
 
-    # Get maps (include schedules, just for test purposes)
-    (
-        sio_seq, (sched_map_before, sched_map_after)
-    ), (
-        sio_lconc, (lconc_sched_before, lconc_sched_after)
-    ), (
-        sio_gconc, (gconc_sched_before, gconc_sched_after)
-    ) = scheds[("1", "i0")]
-
     # Create expected maps and compare
 
-    lconc_sched_before_exp = isl.Map(
+    sched_before_lconc_exp = isl.Map(
         "[p2] -> {[%s=0,l0,l1,g0] -> [%s] : 0<=l0,l1,g0<p2}"
         % (
             STATEMENT_VAR_NAME,
@@ -921,7 +861,7 @@ def test_sios_and_schedules_with_lbarriers():
             )
         )
 
-    lconc_sched_after_exp = isl.Map(
+    sched_after_lconc_exp = isl.Map(
         "[p1,p2] -> {[%s=1,i,j,l0,l1,g0] -> [%s] : 0<=i,j<p1 and 0<=l0,l1,g0<p2}"
         % (
             STATEMENT_VAR_NAME,
@@ -946,10 +886,13 @@ def test_sios_and_schedules_with_lbarriers():
             )
         )
 
-    _align_and_compare_maps(
-        [lconc_sched_before_exp, lconc_sched_after_exp, sio_lconc_exp],
-        [lconc_sched_before, lconc_sched_after, sio_lconc],
+    _check_sio_for_stmt_pair(
+        "1", "i0", scheds,
+        sio_lconc_exp=sio_lconc_exp,
+        sched_before_lconc_exp=sched_before_lconc_exp,
+        sched_after_lconc_exp=sched_after_lconc_exp,
         )
+
 # }}}
 
 

From 1b81ad8b7c506ae85eb33aca243fbd5d0457a8d1 Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Thu, 25 Mar 2021 19:05:10 -0500
Subject: [PATCH 206/460] insn->stmt

---
 test/test_linearization_checker.py | 48 +++++++++++++++---------------
 1 file changed, 24 insertions(+), 24 deletions(-)

diff --git a/test/test_linearization_checker.py b/test/test_linearization_checker.py
index be73e1bb2..e44c5eec5 100644
--- a/test/test_linearization_checker.py
+++ b/test/test_linearization_checker.py
@@ -100,8 +100,8 @@ def test_pairwise_schedule_creation():
     )
 
     # Example kernel
-    # insn_c depends on insn_b only to create deterministic order
-    # insn_d depends on insn_c only to create deterministic order
+    # stmt_c depends on stmt_b only to create deterministic order
+    # stmt_d depends on stmt_c only to create deterministic order
     knl = lp.make_kernel(
         [
             "{[i]: 0<=i<pi}",
@@ -112,15 +112,15 @@ def test_pairwise_schedule_creation():
         """
         for i
             for k
-                <>temp = b[i,k]  {id=insn_a}
+                <>temp = b[i,k]  {id=stmt_a}
             end
             for j
-                a[i,j] = temp + 1  {id=insn_b,dep=insn_a}
-                c[i,j] = d[i,j]  {id=insn_c,dep=insn_b}
+                a[i,j] = temp + 1  {id=stmt_b,dep=stmt_a}
+                c[i,j] = d[i,j]  {id=stmt_c,dep=stmt_b}
             end
         end
         for t
-            e[t] = f[t]  {id=insn_d, dep=insn_c}
+            e[t] = f[t]  {id=stmt_d, dep=stmt_c}
         end
         """,
         name="example",
@@ -138,12 +138,12 @@ def test_pairwise_schedule_creation():
     linearization_items = lin_knl.linearization
 
     insn_id_pairs = [
-        ("insn_a", "insn_b"),
-        ("insn_a", "insn_c"),
-        ("insn_a", "insn_d"),
-        ("insn_b", "insn_c"),
-        ("insn_b", "insn_d"),
-        ("insn_c", "insn_d"),
+        ("stmt_a", "stmt_b"),
+        ("stmt_a", "stmt_c"),
+        ("stmt_a", "stmt_d"),
+        ("stmt_b", "stmt_c"),
+        ("stmt_b", "stmt_d"),
+        ("stmt_c", "stmt_d"),
         ]
     scheds = get_schedules_for_statement_pairs(
         lin_knl,
@@ -152,7 +152,7 @@ def test_pairwise_schedule_creation():
         return_schedules=True,  # include schedules for testing
         )
 
-    # Relationship between insn_a and insn_b ---------------------------------------
+    # Relationship between stmt_a and stmt_b ---------------------------------------
 
     # Create expected maps and compare
 
@@ -173,13 +173,13 @@ def test_pairwise_schedule_creation():
         )
 
     _check_sio_for_stmt_pair(
-        "insn_a", "insn_b", scheds,
+        "stmt_a", "stmt_b", scheds,
         sched_before_seq_exp=sched_before_seq_exp,
         sched_after_seq_exp=sched_after_seq_exp,
         )
 
     # ------------------------------------------------------------------------------
-    # Relationship between insn_a and insn_c ---------------------------------------
+    # Relationship between stmt_a and stmt_c ---------------------------------------
 
     # Create expected maps and compare
 
@@ -200,13 +200,13 @@ def test_pairwise_schedule_creation():
         )
 
     _check_sio_for_stmt_pair(
-        "insn_a", "insn_c", scheds,
+        "stmt_a", "stmt_c", scheds,
         sched_before_seq_exp=sched_before_seq_exp,
         sched_after_seq_exp=sched_after_seq_exp,
         )
 
     # ------------------------------------------------------------------------------
-    # Relationship between insn_a and insn_d ---------------------------------------
+    # Relationship between stmt_a and stmt_d ---------------------------------------
 
     # Create expected maps and compare
 
@@ -227,13 +227,13 @@ def test_pairwise_schedule_creation():
         )
 
     _check_sio_for_stmt_pair(
-        "insn_a", "insn_d", scheds,
+        "stmt_a", "stmt_d", scheds,
         sched_before_seq_exp=sched_before_seq_exp,
         sched_after_seq_exp=sched_after_seq_exp,
         )
 
     # ------------------------------------------------------------------------------
-    # Relationship between insn_b and insn_c ---------------------------------------
+    # Relationship between stmt_b and stmt_c ---------------------------------------
 
     # Create expected maps and compare
 
@@ -254,13 +254,13 @@ def test_pairwise_schedule_creation():
         )
 
     _check_sio_for_stmt_pair(
-        "insn_b", "insn_c", scheds,
+        "stmt_b", "stmt_c", scheds,
         sched_before_seq_exp=sched_before_seq_exp,
         sched_after_seq_exp=sched_after_seq_exp,
         )
 
     # ------------------------------------------------------------------------------
-    # Relationship between insn_b and insn_d ---------------------------------------
+    # Relationship between stmt_b and stmt_d ---------------------------------------
 
     # Create expected maps and compare
 
@@ -281,13 +281,13 @@ def test_pairwise_schedule_creation():
         )
 
     _check_sio_for_stmt_pair(
-        "insn_b", "insn_d", scheds,
+        "stmt_b", "stmt_d", scheds,
         sched_before_seq_exp=sched_before_seq_exp,
         sched_after_seq_exp=sched_after_seq_exp,
         )
 
     # ------------------------------------------------------------------------------
-    # Relationship between insn_c and insn_d ---------------------------------------
+    # Relationship between stmt_c and stmt_d ---------------------------------------
 
     # Create expected maps and compare
 
@@ -308,7 +308,7 @@ def test_pairwise_schedule_creation():
         )
 
     _check_sio_for_stmt_pair(
-        "insn_c", "insn_d", scheds,
+        "stmt_c", "stmt_d", scheds,
         sched_before_seq_exp=sched_before_seq_exp,
         sched_after_seq_exp=sched_after_seq_exp,
         )

From a029c6719d0d678d017482bc4f1fc2c05c91577a Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Thu, 25 Mar 2021 19:19:02 -0500
Subject: [PATCH 207/460] only increment barrier count if barrier scope matches

---
 loopy/schedule/checker/schedule.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/loopy/schedule/checker/schedule.py b/loopy/schedule/checker/schedule.py
index 5392578f6..974f4051b 100644
--- a/loopy/schedule/checker/schedule.py
+++ b/loopy/schedule/checker/schedule.py
@@ -384,8 +384,9 @@ def _collect_blex_ordering_info(sync_kind):
                 # Don't increment blex dim val
 
             elif isinstance(lin_item, Barrier):
-                # Increment blex dim val
-                next_blex_pt[-1] += 1
+                # Increment blex dim val if the sync scope matches
+                if lin_item.synchronization_kind == sync_kind:
+                    next_blex_pt[-1] += 1
 
             else:
                 from loopy.schedule import (CallKernel, ReturnFromKernel)

From 264dfda337af0f213f73c5525cd91acc6a054af7 Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Thu, 25 Mar 2021 19:32:55 -0500
Subject: [PATCH 208/460] add some tests with global barriers

---
 test/test_linearization_checker.py | 83 +++++++++++++++++++++++++++++-
 1 file changed, 82 insertions(+), 1 deletion(-)

diff --git a/test/test_linearization_checker.py b/test/test_linearization_checker.py
index e44c5eec5..be67f685a 100644
--- a/test/test_linearization_checker.py
+++ b/test/test_linearization_checker.py
@@ -697,7 +697,7 @@ def test_statement_instance_ordering_with_hw_par_tags():
 
 # {{{ SIOs and schedules with barriers
 
-def test_sios_and_schedules_with_lbarriers():
+def test_sios_and_schedules_with_barriers():
     from loopy.schedule.checker import (
         get_schedules_for_statement_pairs,
     )
@@ -718,6 +718,7 @@ def test_sios_and_schedules_with_lbarriers():
                     for i
                         <>tempi0 = 0  {id=i0,dep=1}
                         ... lbarrier {id=ib0,dep=i0}
+                        ... gbarrier {id=ibb0,dep=i0}
                         <>tempi1 = 0  {id=i1,dep=ib0}
                         <>tempi2 = 0  {id=i2,dep=i1}
                         for j
@@ -795,11 +796,52 @@ def test_sios_and_schedules_with_lbarriers():
             )
         )
 
+    sched_before_gconc_exp = isl.Map(
+        "[p1,p2] -> {[%s=0,i,j,l0,l1,g0] -> [%s] : 0<=i,j<p1 and %s}"
+        % (
+            STATEMENT_VAR_NAME,
+            _lex_point_string(
+                ["1", "i", "1"],
+                lid_inames=["l0", "l1"], gid_inames=["g0"],
+                prefix=BLEX_VAR_PREFIX,
+                ),
+            conc_iname_bound_str,
+            )
+        )
+
+    sched_after_gconc_exp = isl.Map(
+        "[p2] -> {[%s=1,l0,l1,g0] -> [%s] : %s}"
+        % (
+            STATEMENT_VAR_NAME,
+            _lex_point_string(
+                ["2", "0", "0"],
+                lid_inames=["l0", "l1"], gid_inames=["g0"],
+                prefix=BLEX_VAR_PREFIX,
+                ),
+            conc_iname_bound_str,
+            )
+        )
+
+    sio_gconc_exp = _isl_map_with_marked_dims(
+        "[p1,p2] -> {{ "
+        "[{0}'=0,i',j',l0',l1',g0'] -> [{0}=1,l0,l1,g0] : "
+        "0 <= i' < p1-1 and 0 <= j' < p1 "  # not last iteration of j
+        "and {1} and {2}"  # conc iname bounds
+        "}}".format(
+            STATEMENT_VAR_NAME,
+            conc_iname_bound_str,
+            conc_iname_bound_str_p,
+            )
+        )
+
     _check_sio_for_stmt_pair(
         "j1", "2", scheds,
         sio_lconc_exp=sio_lconc_exp,
         sched_before_lconc_exp=sched_before_lconc_exp,
         sched_after_lconc_exp=sched_after_lconc_exp,
+        sio_gconc_exp=sio_gconc_exp,
+        sched_before_gconc_exp=sched_before_gconc_exp,
+        sched_after_gconc_exp=sched_after_gconc_exp,
         )
 
     # Check for some key example pairs in the sio_lconc map
@@ -886,11 +928,50 @@ def test_sios_and_schedules_with_lbarriers():
             )
         )
 
+    sched_before_gconc_exp = isl.Map(
+        "[p2] -> {[%s=0,l0,l1,g0] -> [%s] : 0<=l0,l1,g0<p2}"
+        % (
+            STATEMENT_VAR_NAME,
+            _lex_point_string(
+                ["0", "0", "0"],
+                lid_inames=["l0", "l1"], gid_inames=["g0"],
+                prefix=BLEX_VAR_PREFIX,
+                ),
+            )
+        )
+
+    sched_after_gconc_exp = isl.Map(
+        "[p1,p2] -> {[%s=1,i,j,l0,l1,g0] -> [%s] : 0<=i,j<p1 and 0<=l0,l1,g0<p2}"
+        % (
+            STATEMENT_VAR_NAME,
+            _lex_point_string(
+                ["1", "i", "0"],
+                lid_inames=["l0", "l1"], gid_inames=["g0"],
+                prefix=BLEX_VAR_PREFIX,
+                ),
+            )
+        )
+
+    sio_gconc_exp = _isl_map_with_marked_dims(
+        "[p1,p2] -> {{ "
+        "[{0}'=0,l0',l1',g0'] -> [{0}=1,i,j,l0,l1,g0] : "
+        "1 <= i < p1 and 0 <= j < p1 "  # not first iteration of i
+        "and {1} and {2}"  # conc iname bounds
+        "}}".format(
+            STATEMENT_VAR_NAME,
+            conc_iname_bound_str,
+            conc_iname_bound_str_p,
+            )
+        )
+
     _check_sio_for_stmt_pair(
         "1", "i0", scheds,
         sio_lconc_exp=sio_lconc_exp,
         sched_before_lconc_exp=sched_before_lconc_exp,
         sched_after_lconc_exp=sched_after_lconc_exp,
+        sio_gconc_exp=sio_gconc_exp,
+        sched_before_gconc_exp=sched_before_gconc_exp,
+        sched_after_gconc_exp=sched_after_gconc_exp,
         )
 
 # }}}

From d49333fe5796145aeab9e392f52c4e6ef4b4c958 Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Thu, 25 Mar 2021 21:22:43 -0500
Subject: [PATCH 209/460] use variable bounds in parallel+barrier sio/sched
 test; clean up expected map syntax with pre-made iname bounds strings

---
 test/test_linearization_checker.py | 154 +++++++++++++++++++----------
 1 file changed, 101 insertions(+), 53 deletions(-)

diff --git a/test/test_linearization_checker.py b/test/test_linearization_checker.py
index be67f685a..1d0bc5748 100644
--- a/test/test_linearization_checker.py
+++ b/test/test_linearization_checker.py
@@ -702,11 +702,11 @@ def test_sios_and_schedules_with_barriers():
         get_schedules_for_statement_pairs,
     )
 
+    assumptions = "ij_end >= ij_start + 1 and lg_end >= 1"
     knl = lp.make_kernel(
         [
-            #"{[i,j,l0,l1,g0]: 0<=i,j,l0,l1,g0<p}",
-            "{[i,j]: 0<=i,j<p1}",
-            "{[l0,l1,g0]: 0<=l0,l1,g0<p2}",
+            "{[i,j]: ij_start<=i,j<ij_end}",
+            "{[l0,l1,g0]: 0<=l0,l1,g0<lg_end}",
         ],
         """
         for g0
@@ -733,7 +733,7 @@ def test_sios_and_schedules_with_barriers():
         end
         """,
         name="funky",
-        assumptions="p1,p2 >= 1",
+        assumptions=assumptions,
         lang_version=(2018, 2)
         )
     knl = lp.tag_inames(knl, {"l0": "l.0", "l1": "l.1", "g0": "g.0"})
@@ -753,28 +753,34 @@ def test_sios_and_schedules_with_barriers():
 
     # Create expected maps and compare
 
-    conc_iname_bound_str = "0<=l0,l1,g0<p2"
-    conc_iname_bound_str_p = "0<=l0',l1',g0'<p2"
+    # Iname bound strings to facilitate creation of expected maps
+    iname_bound_str = "ij_start <= i,j< ij_end"
+    iname_bound_str_p = "ij_start <= i',j'< ij_end"
+    conc_iname_bound_str = "0 <= l0,l1,g0 < lg_end"
+    conc_iname_bound_str_p = "0 <= l0',l1',g0' < lg_end"
 
     sched_before_lconc_exp = isl.Map(
-        "[p1,p2] -> {[%s=0,i,j,l0,l1,g0] -> [%s] : 0<=i,j<p1 and %s}"
+        "[ij_start, ij_end, lg_end] -> {"
+        "[%s=0, i, j, l0, l1, g0] -> [%s] : "
+        "%s and %s}"  # iname bounds
         % (
             STATEMENT_VAR_NAME,
             _lex_point_string(
-                ["2", "i", "2", "j", "1"],
+                ["2", "i", "2", "j", "1"],  # lex points
                 lid_inames=["l0", "l1"], gid_inames=["g0"],
                 prefix=BLEX_VAR_PREFIX,
                 ),
+            iname_bound_str,
             conc_iname_bound_str,
             )
         )
 
     sched_after_lconc_exp = isl.Map(
-        "[p2] -> {[%s=1,l0,l1,g0] -> [%s] : %s}"
+        "[lg_end] -> {[%s=1, l0, l1, g0] -> [%s] : %s}"
         % (
             STATEMENT_VAR_NAME,
             _lex_point_string(
-                ["3", "0", "0", "0", "0"],
+                ["3", "0", "0", "0", "0"],  # lex points
                 lid_inames=["l0", "l1"], gid_inames=["g0"],
                 prefix=BLEX_VAR_PREFIX,
                 ),
@@ -783,38 +789,45 @@ def test_sios_and_schedules_with_barriers():
         )
 
     sio_lconc_exp = _isl_map_with_marked_dims(
-        "[p1,p2] -> {{ "
-        "[{0}'=0,i',j',l0',l1',g0'] -> [{0}=1,l0,l1,g0] : "
-        "((0 <= i' < p1 and 0 <= j' < p1-1) or "  # not last iteration of j
-        " (0 <= i' < p1-1 and 0 <= j' < p1))"  # not last iteration of i
+        "[ij_start, ij_end, lg_end] -> {{ "
+        "[{0}'=0, i', j', l0', l1', g0'] -> [{0}=1, l0, l1, g0] : "
+        "(ij_start <= j' < ij_end-1 or "  # not last iteration of j
+        " ij_start <= i' < ij_end-1) "  # not last iteration of i
         "and g0 = g0' "  # within a single group
-        "and {1} and {2}"  # conc iname bounds
+        "and {1} and {2} and {3} "  # iname bounds
+        "and {4}"  # param assumptions
         "}}".format(
             STATEMENT_VAR_NAME,
+            iname_bound_str_p,
             conc_iname_bound_str,
             conc_iname_bound_str_p,
+            assumptions,
             )
         )
 
     sched_before_gconc_exp = isl.Map(
-        "[p1,p2] -> {[%s=0,i,j,l0,l1,g0] -> [%s] : 0<=i,j<p1 and %s}"
+        "[ij_start, ij_end, lg_end] -> {"
+        "[%s=0, i, j, l0, l1, g0] -> [%s] : "
+        "%s and %s}"  # iname bounds
         % (
             STATEMENT_VAR_NAME,
             _lex_point_string(
-                ["1", "i", "1"],
+                ["1", "i", "1"],  # lex points
                 lid_inames=["l0", "l1"], gid_inames=["g0"],
                 prefix=BLEX_VAR_PREFIX,
                 ),
+            iname_bound_str,
             conc_iname_bound_str,
             )
         )
 
     sched_after_gconc_exp = isl.Map(
-        "[p2] -> {[%s=1,l0,l1,g0] -> [%s] : %s}"
+        "[lg_end] -> {[%s=1, l0, l1, g0] -> [%s] : "
+        "%s}"  # iname bounds
         % (
             STATEMENT_VAR_NAME,
             _lex_point_string(
-                ["2", "0", "0"],
+                ["2", "0", "0"],  # lex points
                 lid_inames=["l0", "l1"], gid_inames=["g0"],
                 prefix=BLEX_VAR_PREFIX,
                 ),
@@ -823,14 +836,17 @@ def test_sios_and_schedules_with_barriers():
         )
 
     sio_gconc_exp = _isl_map_with_marked_dims(
-        "[p1,p2] -> {{ "
-        "[{0}'=0,i',j',l0',l1',g0'] -> [{0}=1,l0,l1,g0] : "
-        "0 <= i' < p1-1 and 0 <= j' < p1 "  # not last iteration of j
-        "and {1} and {2}"  # conc iname bounds
+        "[ij_start,ij_end,lg_end] -> {{ "
+        "[{0}'=0, i', j', l0', l1', g0'] -> [{0}=1, l0, l1, g0] : "
+        "ij_start <= i' < ij_end-1 "  # not last iteration of i
+        "and {1} and {2} and {3} "  # iname bounds
+        "and {4}"  # param assumptions
         "}}".format(
             STATEMENT_VAR_NAME,
+            iname_bound_str_p,
             conc_iname_bound_str,
             conc_iname_bound_str_p,
+            assumptions,
             )
         )
 
@@ -858,17 +874,24 @@ def test_sios_and_schedules_with_barriers():
     # As long as this is not the last iteration of the i loop, then there
     # should be a barrier between the last instance of statement j1
     # and statement 2:
-    p1_val = 7
-    last_i_val = p1_val - 1
+    ij_end_val = 7
+    last_i_val = ij_end_val - 1
     max_non_last_i_val = last_i_val - 1  # max i val that isn't the last iteration
 
     wanted_pairs = _isl_map_with_marked_dims(
-        "[p1,p2] -> {{"
-        "[{0}' = 0, i', j'=p1-1, g0', l0', l1'] -> [{0} = 1, l0, l1, g0] : "
-        "0 <= i' <= {1} and "  # constrain i
-        "p1 >= {2} and "  # constrain p
-        "0<=l0',l1',g0',l0,l1,g0<p2 and g0=g0'"
-        "}}".format(STATEMENT_VAR_NAME, max_non_last_i_val, p1_val))
+        "[ij_start, ij_end, lg_end] -> {{"
+        "[{0}' = 0, i', j'=ij_end-1, g0', l0', l1'] -> [{0} = 1, l0, l1, g0] : "
+        "ij_start <= i' <= {1} "  # constrain i
+        "and ij_end >= {2} "  # constrain ij_end
+        "and g0 = g0' "  # within a single group
+        "and {3} and {4} "  # conc iname bounds
+        "}}".format(
+            STATEMENT_VAR_NAME,
+            max_non_last_i_val,
+            ij_end_val,
+            conc_iname_bound_str,
+            conc_iname_bound_str_p,
+            ))
     wanted_pairs = ensure_dim_names_match_and_align(wanted_pairs, sio_lconc)
 
     assert wanted_pairs.is_subset(sio_lconc)
@@ -877,12 +900,19 @@ def test_sios_and_schedules_with_barriers():
     # should NOT be a barrier between the last instance of statement j1
     # and statement 2:
     unwanted_pairs = _isl_map_with_marked_dims(
-        "[p1,p2] -> {{"
-        "[{0}' = 0, i', j'=p1-1, g0', l0', l1'] -> [{0} = 1, l0, l1, g0] : "
-        "0 <= i' <= {1} and "  # constrain i
-        "p1 >= {2} and "  # constrain p
-        "0<=l0',l1',g0',l0,l1,g0<p2 and g0=g0'"
-        "}}".format(STATEMENT_VAR_NAME, last_i_val, p1_val))
+        "[ij_start, ij_end, lg_end] -> {{"
+        "[{0}' = 0, i', j'=ij_end-1, g0', l0', l1'] -> [{0} = 1, l0, l1, g0] : "
+        "ij_start <= i' <= {1} "  # constrain i
+        "and ij_end >= {2} "  # constrain p
+        "and g0 = g0' "  # within a single group
+        "and {3} and {4} "  # conc iname bounds
+        "}}".format(
+            STATEMENT_VAR_NAME,
+            last_i_val,
+            ij_end_val,
+            conc_iname_bound_str,
+            conc_iname_bound_str_p,
+            ))
     unwanted_pairs = ensure_dim_names_match_and_align(unwanted_pairs, sio_lconc)
 
     assert not unwanted_pairs.is_subset(sio_lconc)
@@ -892,75 +922,93 @@ def test_sios_and_schedules_with_barriers():
     # Create expected maps and compare
 
     sched_before_lconc_exp = isl.Map(
-        "[p2] -> {[%s=0,l0,l1,g0] -> [%s] : 0<=l0,l1,g0<p2}"
+        "[lg_end] -> {[%s=0, l0, l1, g0] -> [%s] : "
+        "%s}"  # iname bounds
         % (
             STATEMENT_VAR_NAME,
             _lex_point_string(
-                ["1", "0", "0", "0", "0"],
+                ["1", "0", "0", "0", "0"],  # lex points
                 lid_inames=["l0", "l1"], gid_inames=["g0"],
                 prefix=BLEX_VAR_PREFIX,
                 ),
+            conc_iname_bound_str,
             )
         )
 
     sched_after_lconc_exp = isl.Map(
-        "[p1,p2] -> {[%s=1,i,j,l0,l1,g0] -> [%s] : 0<=i,j<p1 and 0<=l0,l1,g0<p2}"
+        "[ij_start, ij_end, lg_end] -> {"
+        "[%s=1, i, j, l0, l1, g0] -> [%s] : "
+        "%s and %s}"  # iname bounds
         % (
             STATEMENT_VAR_NAME,
             _lex_point_string(
-                ["2", "i", "0", "0", "0"],
+                ["2", "i", "0", "0", "0"],  # lex points
                 lid_inames=["l0", "l1"], gid_inames=["g0"],
                 prefix=BLEX_VAR_PREFIX,
                 ),
+            iname_bound_str,
+            conc_iname_bound_str,
             )
         )
 
     sio_lconc_exp = _isl_map_with_marked_dims(
-        "[p1,p2] -> {{ "
-        "[{0}'=0,l0',l1',g0'] -> [{0}=1,i,j,l0,l1,g0] : "
-        "1 <= i < p1 and 0 <= j < p1 "  # not first iteration of i
+        "[ij_start, ij_end, lg_end] -> {{ "
+        "[{0}'=0, l0', l1', g0'] -> [{0}=1, i, j, l0, l1, g0] : "
+        "ij_start + 1 <= i < ij_end "  # not first iteration of i
         "and g0 = g0' "  # within a single group
-        "and {1} and {2}"  # conc iname bounds
+        "and {1} and {2} and {3} "  # iname bounds
+        "and {4}"  # param assumptions
         "}}".format(
             STATEMENT_VAR_NAME,
+            iname_bound_str,
             conc_iname_bound_str,
             conc_iname_bound_str_p,
+            assumptions,
             )
         )
 
     sched_before_gconc_exp = isl.Map(
-        "[p2] -> {[%s=0,l0,l1,g0] -> [%s] : 0<=l0,l1,g0<p2}"
+        "[lg_end] -> {[%s=0, l0, l1, g0] -> [%s] : "
+        "%s}"  # iname bounds
         % (
             STATEMENT_VAR_NAME,
             _lex_point_string(
-                ["0", "0", "0"],
+                ["0", "0", "0"],  # lex points
                 lid_inames=["l0", "l1"], gid_inames=["g0"],
                 prefix=BLEX_VAR_PREFIX,
                 ),
+            conc_iname_bound_str,
             )
         )
 
     sched_after_gconc_exp = isl.Map(
-        "[p1,p2] -> {[%s=1,i,j,l0,l1,g0] -> [%s] : 0<=i,j<p1 and 0<=l0,l1,g0<p2}"
+        "[ij_start, ij_end, lg_end] -> {"
+        "[%s=1, i, j, l0, l1, g0] -> [%s] : "
+        "%s and %s}"  # iname bounds
         % (
             STATEMENT_VAR_NAME,
             _lex_point_string(
-                ["1", "i", "0"],
+                ["1", "i", "0"],  # lex points
                 lid_inames=["l0", "l1"], gid_inames=["g0"],
                 prefix=BLEX_VAR_PREFIX,
                 ),
+            iname_bound_str,
+            conc_iname_bound_str,
             )
         )
 
     sio_gconc_exp = _isl_map_with_marked_dims(
-        "[p1,p2] -> {{ "
-        "[{0}'=0,l0',l1',g0'] -> [{0}=1,i,j,l0,l1,g0] : "
-        "1 <= i < p1 and 0 <= j < p1 "  # not first iteration of i
-        "and {1} and {2}"  # conc iname bounds
+        "[ij_start, ij_end, lg_end] -> {{ "
+        "[{0}'=0, l0', l1', g0'] -> [{0}=1, i, j, l0, l1, g0] : "
+        "ij_start + 1 <= i < ij_end "  # not first iteration of i
+        "and {1} and {2} and {3} "  # iname bounds
+        "and {4}"  # param assumptions
         "}}".format(
             STATEMENT_VAR_NAME,
+            iname_bound_str,
             conc_iname_bound_str,
             conc_iname_bound_str_p,
+            assumptions,
             )
         )
 

From 7cf548898c8920359b9947f029e8e6beb5b7cc6c Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Thu, 25 Mar 2021 21:32:19 -0500
Subject: [PATCH 210/460] update doctest

---
 loopy/schedule/checker/__init__.py | 21 ++++++++-------------
 1 file changed, 8 insertions(+), 13 deletions(-)

diff --git a/loopy/schedule/checker/__init__.py b/loopy/schedule/checker/__init__.py
index f2caec55f..904f2f867 100644
--- a/loopy/schedule/checker/__init__.py
+++ b/loopy/schedule/checker/__init__.py
@@ -61,35 +61,30 @@ def get_schedules_for_statement_pairs(
         >>> import numpy as np
         >>> # Make kernel -----------------------------------------------------------
         >>> knl = lp.make_kernel(
-        ...     "{[i,j,k]: 0<=i<pi and 0<=j<pj and 0<=k<pk}",
+        ...     "{[j,k]: 0<=j<pj and 0<=k<pk}",
         ...     [
-        ...         "a[i,j] = j  {id=insn_a}",
-        ...         "b[i,k] = k+a[i,0]  {id=insn_b,dep=insn_a}",
+        ...         "a[j] = j  {id=insn_a}",
+        ...         "b[k] = k+a[0]  {id=insn_b,dep=insn_a}",
         ...     ])
         >>> knl = lp.add_and_infer_dtypes(knl, {"a": np.float32, "b": np.float32})
-        >>> knl = lp.prioritize_loops(knl, "i,j")
-        >>> knl = lp.prioritize_loops(knl, "i,k")
         >>> # Get a linearization
         >>> knl = lp.get_one_linearized_kernel(lp.preprocess_kernel(knl))
         >>> # Get a pairwise schedule -----------------------------------------------
         >>> from loopy.schedule.checker import get_schedules_for_statement_pairs
         >>> # Get two maps ----------------------------------------------------------
-        >>> schedules = get_schedules_for_statement_pairs(
+        >>> sio_dict = get_schedules_for_statement_pairs(
         ...     knl,
         ...     knl.linearization,
         ...     [("insn_a", "insn_b")],
         ...     )
-        >>> # Print maps
+        >>> # Print map
         >>> print("\n".join(
         ...     str(m).replace("{ ", "{\n").replace(" :", "\n:")
-        ...     for m in schedules[("insn_a", "insn_b")][0]
+        ...     for m in sio_dict[("insn_a", "insn_b")][0]
         ...     ))
         [pi, pj, pk] -> {
-        [_lp_linchk_stmt = 0, i, j, k] -> [_lp_linchk_lex0 = i, _lp_linchk_lex1 = 0]
-        : 0 <= i < pi and 0 <= j < pj and 0 <= k < pk }
-        [pi, pj, pk] -> {
-        [_lp_linchk_stmt = 1, i, j, k] -> [_lp_linchk_lex0 = i, _lp_linchk_lex1 = 1]
-        : 0 <= i < pi and 0 <= j < pj and 0 <= k < pk }
+        [_lp_linchk_stmt' = 0, j', k'] -> [_lp_linchk_stmt = 1, j, k]
+        : 0 <= j < pj and 0 <= k < pk and 0 <= j' < pj and 0 <= k' < pk }
 
     """
     # TODO update docs and docstring now that we're returning SIOs

From e92401b0d09ac69d9b22ce187dff61e0bc5ca476 Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Fri, 26 Mar 2021 15:07:59 -0500
Subject: [PATCH 211/460] fix doctest (?)

---
 loopy/schedule/checker/__init__.py | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/loopy/schedule/checker/__init__.py b/loopy/schedule/checker/__init__.py
index 904f2f867..e99658cd1 100644
--- a/loopy/schedule/checker/__init__.py
+++ b/loopy/schedule/checker/__init__.py
@@ -78,16 +78,14 @@ def get_schedules_for_statement_pairs(
         ...     [("insn_a", "insn_b")],
         ...     )
         >>> # Print map
-        >>> print("\n".join(
-        ...     str(m).replace("{ ", "{\n").replace(" :", "\n:")
-        ...     for m in sio_dict[("insn_a", "insn_b")][0]
-        ...     ))
+        >>> print(str(sio_dict[("insn_a", "insn_b")][0]
+        ...     ).replace("{ ", "{\n").replace(" :", "\n:"))
         [pi, pj, pk] -> {
         [_lp_linchk_stmt' = 0, j', k'] -> [_lp_linchk_stmt = 1, j, k]
         : 0 <= j < pj and 0 <= k < pk and 0 <= j' < pj and 0 <= k' < pk }
 
     """
-    # TODO update docs and docstring now that we're returning SIOs
+    # TODO update docs and doctest now that we're returning SIOs
 
     # {{{ make sure kernel has been preprocessed
 

From 6a4d64703a089b1d645ef39ed1395c03f42b589c Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Fri, 26 Mar 2021 15:08:41 -0500
Subject: [PATCH 212/460] use const class for special blex strings

---
 loopy/schedule/checker/schedule.py | 73 ++++++++++++++++++++++--------
 1 file changed, 54 insertions(+), 19 deletions(-)

diff --git a/loopy/schedule/checker/schedule.py b/loopy/schedule/checker/schedule.py
index 974f4051b..0395172d9 100644
--- a/loopy/schedule/checker/schedule.py
+++ b/loopy/schedule/checker/schedule.py
@@ -53,12 +53,6 @@
 for par_level in [0, 1, 2]:
     GTAG_VAR_NAMES.append("%sgid%d" % (LIN_CHECK_IDENTIFIER_PREFIX, par_level))
     LTAG_VAR_NAMES.append("%slid%d" % (LIN_CHECK_IDENTIFIER_PREFIX, par_level))
-PRE = "pre"
-FIRST = "first"
-TOP = "top"
-BOTTOM = "bottom"
-LAST = "last"
-POST = "post"
 # TODO document new vars
 
 
@@ -120,6 +114,43 @@ def _simplify_lex_dims(tup0, tup1):
         return tuple(new_tup0), tuple(new_tup1)
 
 
+class SpecialLexPointWRTLoop:
+    """Strings specifying a particular position in a lexicographic
+       ordering of statements relative to a loop.
+
+    .. attribute:: PRE
+       A :class:`str` indicating the last lexicographic point that
+       precedes the loop.
+
+    .. attribute:: FIRST
+       A :class:`str` indicating the first lexicographic point in the
+       first loop iteration (i.e., with the iname set to its min. val).
+
+    .. attribute:: TOP
+       A :class:`str` indicating the first lexicographic point in
+       an arbitrary loop iteration.
+
+    .. attribute:: BOTTOM
+       A :class:`str` indicating the last lexicographic point in
+       an arbitrary loop iteration.
+
+    .. attribute:: LAST
+       A :class:`str` indicating the last lexicographic point in the
+       last loop iteration (i.e., with the iname set to its max val).
+
+    .. attribute:: POST
+       A :class:`str` indicating the first lexicographic point that
+       follows the loop.
+    """
+
+    PRE = "pre"
+    FIRST = "first"
+    TOP = "top"
+    BOTTOM = "bottom"
+    LAST = "last"
+    POST = "post"
+
+
 def generate_pairwise_schedules(
         knl,
         lin_items,
@@ -171,6 +202,7 @@ def generate_pairwise_schedules(
         append_marker_to_strings,
         add_eq_isl_constraint_from_names,
     )
+    slex = SpecialLexPointWRTLoop
 
     all_insn_ids = set().union(*insn_id_pairs)
 
@@ -346,9 +378,9 @@ def _collect_blex_ordering_info(sync_kind):
                     first_iter_blex_pt = next_blex_pt[:]
                     first_iter_blex_pt[-2] = lbound
                     blex_exclusion_info[enter_iname] = {
-                        PRE: tuple(pre_loop_blex_pt),  # make sure to copy
-                        TOP: tuple(next_blex_pt),  # make sure to copy
-                        FIRST: tuple(first_iter_blex_pt),  # make sure to copy
+                        slex.PRE: tuple(pre_loop_blex_pt),  # make sure to copy
+                        slex.TOP: tuple(next_blex_pt),  # make sure to copy
+                        slex.FIRST: tuple(first_iter_blex_pt),  # make sure to copy
                         }
                     blex_map_params |= set(lbound.get_var_names(dt.param))
 
@@ -371,10 +403,12 @@ def _collect_blex_ordering_info(sync_kind):
                     ubound = iname_bounds_pwaff[leave_iname][1]
                     last_iter_blex_pt = pre_end_loop_blex_pt[:]
                     last_iter_blex_pt[-2] = ubound
-                    blex_exclusion_info[leave_iname][BOTTOM] = tuple(
+                    blex_exclusion_info[leave_iname][slex.BOTTOM] = tuple(
                         pre_end_loop_blex_pt)
-                    blex_exclusion_info[leave_iname][LAST] = tuple(last_iter_blex_pt)
-                    blex_exclusion_info[leave_iname][POST] = tuple(next_blex_pt)
+                    blex_exclusion_info[leave_iname][slex.LAST] = tuple(
+                        last_iter_blex_pt)
+                    blex_exclusion_info[leave_iname][slex.POST] = tuple(
+                        next_blex_pt)
                     # (make sure ^these are copies)
                     blex_map_params |= set(ubound.get_var_names(dt.param))
 
@@ -447,7 +481,7 @@ def _collect_blex_ordering_info(sync_kind):
         blex_set_affs = isl.affs_from_space(blex_set_template.space)
 
         def _create_excluded_map_for_iname(iname, blueprint):
-            # Note: blueprint[FIRST] and blueprint[LAST] contain pwaffs
+            # Note: blueprint[slex.FIRST] and blueprint[slex.LAST] contain pwaffs
 
             def _create_blex_set_from_tuple_pair(before, after, wrap_cond=False):
 
@@ -492,16 +526,16 @@ def _create_blex_set_from_tuple_pair(before, after, wrap_cond=False):
 
             # enter loop case
             full_blex_set = _create_blex_set_from_tuple_pair(
-                blueprint[PRE], blueprint[FIRST])
+                blueprint[slex.PRE], blueprint[slex.FIRST])
             # wrap loop case
             full_blex_set |= _create_blex_set_from_tuple_pair(
-                blueprint[BOTTOM], blueprint[TOP], wrap_cond=True)
+                blueprint[slex.BOTTOM], blueprint[slex.TOP], wrap_cond=True)
             # leave loop case
             full_blex_set |= _create_blex_set_from_tuple_pair(
-                blueprint[LAST], blueprint[POST])
+                blueprint[slex.LAST], blueprint[slex.POST])
 
             # add cond to fix iteration value for surrounding loops (i = i')
-            for surrounding_iname in blueprint[PRE][1::2]:
+            for surrounding_iname in blueprint[slex.PRE][1::2]:
                 s_blex_var = iname_to_blex_var[surrounding_iname]
                 full_blex_set &= blex_set_affs[s_blex_var].eq_set(
                     blex_set_affs[s_blex_var+BEFORE_MARK])
@@ -717,15 +751,16 @@ def _get_map_for_stmt(
 
         # }}}
 
-        # TODO have option to return sched maps, but default to not returning them
-        #pairwise_schedules[tuple(insn_ids)] = tuple(intra_thread_sched_maps)
         if return_schedules:
+            # Store sched maps along with SIOs
+            # (currently helpful for testing; also could be desired by a user)
             pairwise_schedules[tuple(insn_ids)] = (
                 (sio_seq, tuple(intra_thread_sched_maps), ),
                 (sio_lconc, tuple(lconc_sched_maps), ),
                 (sio_gconc, tuple(gconc_sched_maps), ),
                 )
         else:
+            # Store SIOs
             pairwise_schedules[tuple(insn_ids)] = (sio_seq, sio_lconc, sio_gconc)
 
     return pairwise_schedules

From dfb7b116ad82b7702bd7b2dd4b1faffac52121e7 Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Fri, 26 Mar 2021 15:12:38 -0500
Subject: [PATCH 213/460] use same map prefix for blex/lex vars

---
 loopy/schedule/checker/schedule.py |  3 +--
 test/test_linearization_checker.py | 13 ++-----------
 2 files changed, 3 insertions(+), 13 deletions(-)

diff --git a/loopy/schedule/checker/schedule.py b/loopy/schedule/checker/schedule.py
index 0395172d9..b3bedb105 100644
--- a/loopy/schedule/checker/schedule.py
+++ b/loopy/schedule/checker/schedule.py
@@ -45,7 +45,6 @@
 
 LIN_CHECK_IDENTIFIER_PREFIX = "_lp_linchk_"
 LEX_VAR_PREFIX = "%slex" % (LIN_CHECK_IDENTIFIER_PREFIX)
-BLEX_VAR_PREFIX = "%sblex" % (LIN_CHECK_IDENTIFIER_PREFIX)
 STATEMENT_VAR_NAME = "%sstmt" % (LIN_CHECK_IDENTIFIER_PREFIX)
 BEFORE_MARK = "'"
 GTAG_VAR_NAMES = []
@@ -441,7 +440,7 @@ def _collect_blex_ordering_info(sync_kind):
 
         # Create names for the blex dimensions for sequential loops
         seq_blex_dim_names = [
-            BLEX_VAR_PREFIX+str(i) for i in range(n_seq_blex_dims)]
+            LEX_VAR_PREFIX+str(i) for i in range(n_seq_blex_dims)]
         seq_blex_dim_names_prime = append_marker_to_strings(
             seq_blex_dim_names, marker=BEFORE_MARK)
 
diff --git a/test/test_linearization_checker.py b/test/test_linearization_checker.py
index 1d0bc5748..38e9309ff 100644
--- a/test/test_linearization_checker.py
+++ b/test/test_linearization_checker.py
@@ -38,7 +38,6 @@
 )
 from loopy.schedule.checker.schedule import (
     LEX_VAR_PREFIX,
-    BLEX_VAR_PREFIX,
     STATEMENT_VAR_NAME,
     LTAG_VAR_NAMES,
     GTAG_VAR_NAMES,
@@ -66,13 +65,13 @@ def _align_and_compare_maps(maps):
         assert map1_aligned == map2
 
 
-def _lex_point_string(dim_vals, lid_inames=[], gid_inames=[], prefix=LEX_VAR_PREFIX):
+def _lex_point_string(dim_vals, lid_inames=[], gid_inames=[]):
     # Return a string describing a point in a lex space
     # by assigning values to lex dimension variables
     # (used to create maps below)
 
     return ", ".join(
-        ["%s%d=%s" % (prefix, idx, str(val))
+        ["%s%d=%s" % (LEX_VAR_PREFIX, idx, str(val))
         for idx, val in enumerate(dim_vals)] +
         ["%s=%s" % (LTAG_VAR_NAMES[idx], iname)
         for idx, iname in enumerate(lid_inames)] +
@@ -768,7 +767,6 @@ def test_sios_and_schedules_with_barriers():
             _lex_point_string(
                 ["2", "i", "2", "j", "1"],  # lex points
                 lid_inames=["l0", "l1"], gid_inames=["g0"],
-                prefix=BLEX_VAR_PREFIX,
                 ),
             iname_bound_str,
             conc_iname_bound_str,
@@ -782,7 +780,6 @@ def test_sios_and_schedules_with_barriers():
             _lex_point_string(
                 ["3", "0", "0", "0", "0"],  # lex points
                 lid_inames=["l0", "l1"], gid_inames=["g0"],
-                prefix=BLEX_VAR_PREFIX,
                 ),
             conc_iname_bound_str,
             )
@@ -814,7 +811,6 @@ def test_sios_and_schedules_with_barriers():
             _lex_point_string(
                 ["1", "i", "1"],  # lex points
                 lid_inames=["l0", "l1"], gid_inames=["g0"],
-                prefix=BLEX_VAR_PREFIX,
                 ),
             iname_bound_str,
             conc_iname_bound_str,
@@ -829,7 +825,6 @@ def test_sios_and_schedules_with_barriers():
             _lex_point_string(
                 ["2", "0", "0"],  # lex points
                 lid_inames=["l0", "l1"], gid_inames=["g0"],
-                prefix=BLEX_VAR_PREFIX,
                 ),
             conc_iname_bound_str,
             )
@@ -929,7 +924,6 @@ def test_sios_and_schedules_with_barriers():
             _lex_point_string(
                 ["1", "0", "0", "0", "0"],  # lex points
                 lid_inames=["l0", "l1"], gid_inames=["g0"],
-                prefix=BLEX_VAR_PREFIX,
                 ),
             conc_iname_bound_str,
             )
@@ -944,7 +938,6 @@ def test_sios_and_schedules_with_barriers():
             _lex_point_string(
                 ["2", "i", "0", "0", "0"],  # lex points
                 lid_inames=["l0", "l1"], gid_inames=["g0"],
-                prefix=BLEX_VAR_PREFIX,
                 ),
             iname_bound_str,
             conc_iname_bound_str,
@@ -975,7 +968,6 @@ def test_sios_and_schedules_with_barriers():
             _lex_point_string(
                 ["0", "0", "0"],  # lex points
                 lid_inames=["l0", "l1"], gid_inames=["g0"],
-                prefix=BLEX_VAR_PREFIX,
                 ),
             conc_iname_bound_str,
             )
@@ -990,7 +982,6 @@ def test_sios_and_schedules_with_barriers():
             _lex_point_string(
                 ["1", "i", "0"],  # lex points
                 lid_inames=["l0", "l1"], gid_inames=["g0"],
-                prefix=BLEX_VAR_PREFIX,
                 ),
             iname_bound_str,
             conc_iname_bound_str,

From ca9a8f0c29f6de7818da1ff0a32f7bd18793fd6a Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Fri, 26 Mar 2021 15:34:09 -0500
Subject: [PATCH 214/460] update docs for consts

---
 loopy/schedule/checker/schedule.py | 39 +++++++++++++++++++++---------
 1 file changed, 28 insertions(+), 11 deletions(-)

diff --git a/loopy/schedule/checker/schedule.py b/loopy/schedule/checker/schedule.py
index b3bedb105..adfddea93 100644
--- a/loopy/schedule/checker/schedule.py
+++ b/loopy/schedule/checker/schedule.py
@@ -27,32 +27,49 @@
 
 .. data:: LIN_CHECK_IDENTIFIER_PREFIX
 
-    The prefix for identifiers involved in linearization checking.
+    The :class:`str` prefix for identifiers involved in linearization
+    checking.
 
 .. data:: LEX_VAR_PREFIX
 
-    E.g., a prefix of ``_lp_linchk_lex`` might yield lexicographic dimension
-    variables ``_lp_linchk_lex0``, ``_lp_linchk_lex1``, ``_lp_linchk_lex2``. Cf.
-    :ref:`reserved-identifiers`.
+    The :class:`str` prefix for the variables representing the
+    dimensions in the lexicographic ordering used in a pairwise schedule. E.g.,
+    a prefix of ``_lp_linchk_lex`` might yield lexicographic dimension
+    variables ``_lp_linchk_lex0``, ``_lp_linchk_lex1``, ``_lp_linchk_lex2``.
+    Cf.  :ref:`reserved-identifiers`.
 
 .. data:: STATEMENT_VAR_NAME
 
-    Set the :class:`str` specifying the prefix to be used for the variables
-    representing the dimensions in the lexicographic ordering used in a
-    pairwise schedule.
+    The :class:`str` name for the statement-identifying dimension of maps
+    representing schedules and statement instance orderings.
+
+.. data:: LTAG_VAR_NAME
+
+    An array of :class:`str` names for map dimensions carrying values for local
+    (intra work-group) thread identifiers in maps representing schedules and
+    statement instance orderings.
+
+.. data:: GTAG_VAR_NAME
+
+    An array of :class:`str` names for map dimensions carrying values for group
+    identifiers in maps representing schedules and statement instance orderings.
+
+.. data:: BEFORE_MARK
+
+    The :class:`str` identifier to be appended to input dimension names in
+    maps representing schedules and statement instance orderings.
 
 """
 
 LIN_CHECK_IDENTIFIER_PREFIX = "_lp_linchk_"
 LEX_VAR_PREFIX = "%slex" % (LIN_CHECK_IDENTIFIER_PREFIX)
 STATEMENT_VAR_NAME = "%sstmt" % (LIN_CHECK_IDENTIFIER_PREFIX)
-BEFORE_MARK = "'"
-GTAG_VAR_NAMES = []
 LTAG_VAR_NAMES = []
+GTAG_VAR_NAMES = []
 for par_level in [0, 1, 2]:
-    GTAG_VAR_NAMES.append("%sgid%d" % (LIN_CHECK_IDENTIFIER_PREFIX, par_level))
     LTAG_VAR_NAMES.append("%slid%d" % (LIN_CHECK_IDENTIFIER_PREFIX, par_level))
-# TODO document new vars
+    GTAG_VAR_NAMES.append("%sgid%d" % (LIN_CHECK_IDENTIFIER_PREFIX, par_level))
+BEFORE_MARK = "'"
 
 
 def _pad_tuple_with_zeros(tup, desired_length):

From 5b52894ca9c4f67c19436d98f30cf9e0c8d4b323 Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Fri, 26 Mar 2021 16:46:11 -0500
Subject: [PATCH 215/460] combine instruction pass that determine which loops
 contain barriers with pass that computes the intra-thread schedule

---
 loopy/schedule/checker/schedule.py | 91 ++++++++++++++++--------------
 1 file changed, 48 insertions(+), 43 deletions(-)

diff --git a/loopy/schedule/checker/schedule.py b/loopy/schedule/checker/schedule.py
index adfddea93..ac2429bb1 100644
--- a/loopy/schedule/checker/schedule.py
+++ b/loopy/schedule/checker/schedule.py
@@ -206,6 +206,8 @@ def generate_pairwise_schedules(
         each of the two statements.
     """
     # TODO update docs now that we're returning SIOs
+    # TODO rename loops_to_ignore to loops_to_ignore_for_intra_thread_stuff...
+    # TODO handle 'vec' appropriately; then remove loops_to_ignore?
 
     from loopy.schedule import (EnterLoop, LeaveLoop, Barrier, RunInstruction)
     from loopy.kernel.data import (LocalIndexTag, GroupIndexTag)
@@ -222,22 +224,30 @@ def generate_pairwise_schedules(
 
     all_insn_ids = set().union(*insn_id_pairs)
 
-    # First, use one pass through lin_items to generate a lexicographic
-    # ordering describing the relative order of *all* statements represented by
-    # all_insn_ids
+    # First, use one pass through lin_items to generate an *intra-work-item*
+    # lexicographic ordering describing the relative order of all statements
+    # represented by all_insn_ids
 
     # For each statement, map the insn_id to a tuple representing points
-    # in the lexicographic ordering containing items of :class:`int` or
-    # :class:`str` :mod:`loopy` inames.
+    # in the intra-group lexicographic ordering containing items of :class:`int` or
+    # :class:`str` :mod:`loopy` inames
     stmt_inst_to_lex = {}
 
     # Keep track of the next tuple of points in our lexicographic
     # ordering, initially this as a 1-d point with value 0
     next_insn_lex_tuple = [0]
 
+    # While we're passing through, determine which loops contain barriers,
+    # this information will be used later when creating *intra-group* and
+    # *global* lexicographic orderings
+    loops_with_barriers = {"local": set(), "global": set()}
+    current_inames = set()
+
     for lin_item in lin_items:
         if isinstance(lin_item, EnterLoop):
             iname = lin_item.iname
+            current_inames.add(iname)
+
             if iname in loops_to_ignore:
                 continue
 
@@ -254,7 +264,10 @@ def generate_pairwise_schedules(
             next_insn_lex_tuple.append(0)
 
         elif isinstance(lin_item, LeaveLoop):
-            if lin_item.iname in loops_to_ignore:
+            iname = lin_item.iname
+            current_inames.remove(iname)
+
+            if iname in loops_to_ignore:
                 continue
 
             # Upon leaving a loop,
@@ -271,15 +284,22 @@ def generate_pairwise_schedules(
             # in the simplification step below)
             next_insn_lex_tuple[-1] += 1
 
-        elif isinstance(lin_item, (RunInstruction, Barrier)):
-            from loopy.schedule.checker.utils import (
-                get_insn_id_from_linearization_item,
-            )
-            lp_insn_id = get_insn_id_from_linearization_item(lin_item)
+        elif isinstance(lin_item, RunInstruction):
+            lp_insn_id = lin_item.insn_id
 
-            if lp_insn_id is None:
-                assert isinstance(lin_item, Barrier)
+            # Only process listed insns, otherwise ignore
+            if lp_insn_id in all_insn_ids:
+                # Add item to stmt_inst_to_lex
+                stmt_inst_to_lex[lp_insn_id] = tuple(next_insn_lex_tuple)
+
+                # Increment lex dim val enumerating items in current section of code
+                next_insn_lex_tuple[-1] += 1
+
+        elif isinstance(lin_item, Barrier):
+            lp_insn_id = lin_item.originating_insn_id
+            loops_with_barriers[lin_item.synchronization_kind] |= current_inames
 
+            if lp_insn_id is None:
                 # Barriers without insn ids were inserted as a result of a
                 # dependency. They don't themselves have dependencies. Ignore them.
 
@@ -289,7 +309,7 @@ def generate_pairwise_schedules(
 
                 continue
 
-            # Only process listed insns, otherwise ignore
+            # If barrier was identified in listed insns, process it
             if lp_insn_id in all_insn_ids:
                 # Add item to stmt_inst_to_lex
                 stmt_inst_to_lex[lp_insn_id] = tuple(next_insn_lex_tuple)
@@ -304,55 +324,40 @@ def generate_pairwise_schedules(
                 lin_item, (CallKernel, ReturnFromKernel))
             pass
 
-        # To save time, stop when we've found all statements
-        if len(stmt_inst_to_lex.keys()) == len(all_insn_ids):
-            # TODO if combining blex map creation with this pass, cannot stop early
-            break
+    # {{{ Create blex dim names representing parallel axes
 
-    # Get dim names representing local/group axes for this kernel,
-    # and get the dictionary that will be used later to create a
-    # constraint requiring {par inames == par axes} in sched
+    # Create blex dim names representing lid/gid axes, and create the dicts
+    # that will be used later to create map constraints that match each
+    # parallel iname to the corresponding blex dim name in schedules,
+    # i.e., i = lid0, j = lid1, etc.
     lid_lex_dim_names = set()
     gid_lex_dim_names = set()
     par_iname_constraint_dicts = []
     for iname in knl.all_inames():
         ltag = knl.iname_tags_of_type(iname, LocalIndexTag)
         if ltag:
-            # assert len(ltag) == 1  # (should always be true)
+            assert len(ltag) == 1  # (should always be true)
             ltag_var = LTAG_VAR_NAMES[ltag.pop().axis]
             lid_lex_dim_names.add(ltag_var)
-            # Represent constraint 'iname = ltag_var' in par_iname_constraint_dicts:
             par_iname_constraint_dicts.append({1: 0, iname: 1, ltag_var: -1})
-            continue
+
+            continue  # shouldn't be any GroupIndexTags
+
         gtag = knl.iname_tags_of_type(iname, GroupIndexTag)
         if gtag:
-            # assert len(gtag) == 1  # (should always be true)
+            assert len(gtag) == 1  # (should always be true)
             gtag_var = GTAG_VAR_NAMES[gtag.pop().axis]
             gid_lex_dim_names.add(gtag_var)
-            # Represent constraint 'iname = gtag_var' in par_iname_constraint_dicts:
             par_iname_constraint_dicts.append({1: 0, iname: 1, gtag_var: -1})
-            continue
+
+    # Sort for consistent dimension ordering
     lid_lex_dim_names = sorted(lid_lex_dim_names)
     gid_lex_dim_names = sorted(gid_lex_dim_names)
 
-    # {{{  Create blex ordering (may later be combined with pass above)
-
-    # {{{ Determine which loops contain barriers
-
-    loops_with_barriers = {"local": set(), "global": set()}
-    current_inames = set()
-
-    for lin_item in lin_items:
-        if isinstance(lin_item, EnterLoop):
-            current_inames.add(lin_item.iname)
-        elif isinstance(lin_item, LeaveLoop):
-            current_inames.remove(lin_item.iname)
-        elif isinstance(lin_item, Barrier):
-            loops_with_barriers[lin_item.synchronization_kind] |= current_inames
-            # At this point we could technically skip ahead to next enterloop
-
     # }}}
 
+    # {{{  Create blex ordering (may later be combined with pass above)
+
     # {{{ Get upper and lower bound for each loop that contains a barrier
     # (Could try to combine this with pass below but would make things messy)
 

From ae657dbaa5eb423ff81e3ac7be06925d62afecb3 Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Fri, 26 Mar 2021 16:46:30 -0500
Subject: [PATCH 216/460] remove func get_insn_id_from_linearization_item() (no
 longer used)

---
 loopy/schedule/checker/utils.py | 8 --------
 1 file changed, 8 deletions(-)

diff --git a/loopy/schedule/checker/utils.py b/loopy/schedule/checker/utils.py
index 4ae2fbf64..9382d070a 100644
--- a/loopy/schedule/checker/utils.py
+++ b/loopy/schedule/checker/utils.py
@@ -253,14 +253,6 @@ def partition_inames_by_concurrency(knl):
     return conc_inames, all_inames-conc_inames
 
 
-def get_insn_id_from_linearization_item(linearization_item):
-    from loopy.schedule import Barrier
-    if isinstance(linearization_item, Barrier):
-        return linearization_item.originating_insn_id
-    else:
-        return linearization_item.insn_id
-
-
 def get_EnterLoop_inames(linearization_items):
     from loopy.schedule import EnterLoop
 

From 84ea6e0d9fa063509a772f08063c6f56a6fb949b Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Fri, 26 Mar 2021 21:56:29 -0500
Subject: [PATCH 217/460] significant code cleanup and organization

---
 loopy/schedule/checker/schedule.py | 350 ++++++++++++++++++-----------
 1 file changed, 219 insertions(+), 131 deletions(-)

diff --git a/loopy/schedule/checker/schedule.py b/loopy/schedule/checker/schedule.py
index ac2429bb1..714044a78 100644
--- a/loopy/schedule/checker/schedule.py
+++ b/loopy/schedule/checker/schedule.py
@@ -224,18 +224,20 @@ def generate_pairwise_schedules(
 
     all_insn_ids = set().union(*insn_id_pairs)
 
-    # First, use one pass through lin_items to generate an *intra-work-item*
+    # {{{ Intra-thread lex order creation
+
+    # First, use one pass through lin_items to generate an *intra-thread*
     # lexicographic ordering describing the relative order of all statements
     # represented by all_insn_ids
 
     # For each statement, map the insn_id to a tuple representing points
-    # in the intra-group lexicographic ordering containing items of :class:`int` or
+    # in the intra-thread lexicographic ordering containing items of :class:`int` or
     # :class:`str` :mod:`loopy` inames
-    stmt_inst_to_lex = {}
+    stmt_inst_to_lex_intra_thread = {}
 
     # Keep track of the next tuple of points in our lexicographic
     # ordering, initially this as a 1-d point with value 0
-    next_insn_lex_tuple = [0]
+    next_lex_tuple = [0]
 
     # While we're passing through, determine which loops contain barriers,
     # this information will be used later when creating *intra-group* and
@@ -251,17 +253,17 @@ def generate_pairwise_schedules(
             if iname in loops_to_ignore:
                 continue
 
-            # Increment next_insn_lex_tuple[-1] for statements in the section
-            # of code after this EnterLoop.
+            # Increment next_lex_tuple[-1] for statements in the section
+            # of code between this EnterLoop and the matching LeaveLoop.
             # (not technically necessary if no statement was added in the
             # previous section; gratuitous incrementing is counteracted
             # in the simplification step below)
-            next_insn_lex_tuple[-1] += 1
+            next_lex_tuple[-1] += 1
 
-            # Upon entering a loop, add one lex dimension for the loop variable,
+            # Upon entering a loop, add one lex dimension for the loop iteration,
             # add second lex dim to enumerate sections of code within new loop
-            next_insn_lex_tuple.append(iname)
-            next_insn_lex_tuple.append(0)
+            next_lex_tuple.append(iname)
+            next_lex_tuple.append(0)
 
         elif isinstance(lin_item, LeaveLoop):
             iname = lin_item.iname
@@ -270,30 +272,28 @@ def generate_pairwise_schedules(
             if iname in loops_to_ignore:
                 continue
 
-            # Upon leaving a loop,
-            # pop lex dimension for enumerating code sections within this loop, and
-            # pop lex dimension for the loop variable, and
-            # increment lex dim val enumerating items in current section of code
-            next_insn_lex_tuple.pop()
-            next_insn_lex_tuple.pop()
+            # Upon leaving a loop:
+            # - Pop lex dim for enumerating code sections within this loop
+            # - Pop lex dim for the loop iteration
+            # - Increment lex dim val enumerating items in current section of code
+            next_lex_tuple.pop()
+            next_lex_tuple.pop()
+            next_lex_tuple[-1] += 1
 
-            # Increment next_insn_lex_tuple[-1] for statements in the section
-            # of code after this LeaveLoop.
             # (not technically necessary if no statement was added in the
             # previous section; gratuitous incrementing is counteracted
             # in the simplification step below)
-            next_insn_lex_tuple[-1] += 1
 
         elif isinstance(lin_item, RunInstruction):
             lp_insn_id = lin_item.insn_id
 
             # Only process listed insns, otherwise ignore
             if lp_insn_id in all_insn_ids:
-                # Add item to stmt_inst_to_lex
-                stmt_inst_to_lex[lp_insn_id] = tuple(next_insn_lex_tuple)
+                # Add item to stmt_inst_to_lex_intra_thread
+                stmt_inst_to_lex_intra_thread[lp_insn_id] = tuple(next_lex_tuple)
 
                 # Increment lex dim val enumerating items in current section of code
-                next_insn_lex_tuple[-1] += 1
+                next_lex_tuple[-1] += 1
 
         elif isinstance(lin_item, Barrier):
             lp_insn_id = lin_item.originating_insn_id
@@ -311,11 +311,11 @@ def generate_pairwise_schedules(
 
             # If barrier was identified in listed insns, process it
             if lp_insn_id in all_insn_ids:
-                # Add item to stmt_inst_to_lex
-                stmt_inst_to_lex[lp_insn_id] = tuple(next_insn_lex_tuple)
+                # Add item to stmt_inst_to_lex_intra_thread
+                stmt_inst_to_lex_intra_thread[lp_insn_id] = tuple(next_lex_tuple)
 
                 # Increment lex dim val enumerating items in current section of code
-                next_insn_lex_tuple[-1] += 1
+                next_lex_tuple[-1] += 1
 
         else:
             from loopy.schedule import (CallKernel, ReturnFromKernel)
@@ -324,12 +324,14 @@ def generate_pairwise_schedules(
                 lin_item, (CallKernel, ReturnFromKernel))
             pass
 
-    # {{{ Create blex dim names representing parallel axes
+    # }}}
+
+    # {{{ Create lex dim names representing parallel axes
 
-    # Create blex dim names representing lid/gid axes, and create the dicts
-    # that will be used later to create map constraints that match each
-    # parallel iname to the corresponding blex dim name in schedules,
-    # i.e., i = lid0, j = lid1, etc.
+    # Create lex dim names representing lid/gid axes.
+    # At the same time, create the dicts that will be used later to create map
+    # constraints that match each parallel iname to the corresponding lex dim
+    # name in schedules, i.e., i = lid0, j = lid1, etc.
     lid_lex_dim_names = set()
     gid_lex_dim_names = set()
     par_iname_constraint_dicts = []
@@ -341,7 +343,7 @@ def generate_pairwise_schedules(
             lid_lex_dim_names.add(ltag_var)
             par_iname_constraint_dicts.append({1: 0, iname: 1, ltag_var: -1})
 
-            continue  # shouldn't be any GroupIndexTags
+            continue  # Shouldn't be any GroupIndexTags
 
         gtag = knl.iname_tags_of_type(iname, GroupIndexTag)
         if gtag:
@@ -356,71 +358,119 @@ def generate_pairwise_schedules(
 
     # }}}
 
-    # {{{  Create blex ordering (may later be combined with pass above)
+    # {{{ Intra-group and global blex ("barrier-lex") order creation
+
+    # (may be combined with pass above in future)
+
+    # In blex space, we order barrier-delimited sections of code.
+    # Each statement instance within a single barrier-delimited section will
+    # map to the same blex point. The resulting statement instance ordering
+    # will map each statement to all statements that occur in a later
+    # barrier-delimited section.
+
+    # To achieve this, we will first create a map from statement instances to
+    # lexicographic space almost as before, though we will not increment the
+    # fastest-updating lex dim with each statement, and we will increment it
+    # with each barrier encountered. To denote these differences, we refer to
+    # this space as 'blex' space.
+    # The resulting pairwise schedule, if composed with a map defining a
+    # standard lexicographic ordering (SIO), would include a number of unwanted
+    # 'before->after' pairs, so before creating the SIO, we will subtract the
+    # unwanted pairs from the standard lex order map, yielding the 'blex' order
+    # map.
 
     # {{{ Get upper and lower bound for each loop that contains a barrier
-    # (Could try to combine this with pass below but would make things messy)
 
     iname_bounds_pwaff = {}
     for iname in loops_with_barriers["local"] | loops_with_barriers["global"]:
-        # Get first and last vals for this iname
         bounds = knl.get_iname_bounds(iname)
         iname_bounds_pwaff[iname] = (
             bounds.lower_bound_pw_aff, bounds.upper_bound_pw_aff)
 
     # }}}
 
-    conc_lex_dim_names = lid_lex_dim_names + gid_lex_dim_names
+    all_par_lex_dim_names = lid_lex_dim_names + gid_lex_dim_names
 
-    def _collect_blex_ordering_info(sync_kind):
+    # {{{ _gather_blex_ordering_info()
 
-        # {{{ Construct blueprint for creating blex space and orderings
+    def _gather_blex_ordering_info(sync_kind):
+        # For the given sync_kind ("local" or "global"), create a mapping from
+        # statement instances to blex space (dict), as well as a mapping
+        # defining the blex ordering (isl map from blex space -> blex space)
 
-        stmt_inst_to_blex = {}  # map stmt instances to blex space
-        iname_to_blex_dim = {}  # map from inames to corresponding blex space dim
-        blex_exclusion_info = {}  # info for creating maps to exclude from blex order
-        blex_map_params = set()  # params needed in blex map
-        n_seq_blex_dims = 1  # num dims representing sequential order in blex space
-        next_blex_pt = [0]  # next tuple of points in blex order
+        # Note that, unlike in the intra-thread case, there will be a single
+        # blex ordering map defining the blex ordering for all statement pairs,
+        # rather than separate (smaller) lex ordering maps for each pair
+
+        # {{{ First, create map from stmt instances to blex space.
+
+        # At the same time, gather information necessary to create the
+        # blex ordering map, i.e., for each loop, gather the 6 lex order tuples
+        # defined above in SpecialLexPointWRTLoop that will be required to
+        # create sub-maps which will be *excluded* (subtracted) from a standard
+        # lexicographic ordering in order to create the blex ordering
+
+        stmt_inst_to_blex = {}  # Map stmt instances to blex space
+        iname_to_blex_dim = {}  # Map from inames to corresponding blex space dim
+        blex_exclusion_info = {}  # Info for creating maps to exclude from blex order
+        blex_order_map_params = set()  # Params needed in blex order map
+        n_seq_blex_dims = 1  # Num dims representing sequential order in blex space
+        next_blex_tuple = [0]  # Next tuple of points in blex order
 
         for lin_item in lin_items:
             if isinstance(lin_item, EnterLoop):
                 enter_iname = lin_item.iname
                 if enter_iname in loops_with_barriers[sync_kind]:
-                    # update next blex pt
-                    pre_loop_blex_pt = next_blex_pt[:]
-                    next_blex_pt[-1] += 1
-                    next_blex_pt.append(enter_iname)
-                    next_blex_pt.append(0)
+                    pre_loop_blex_pt = next_blex_tuple[:]
+
+                    # Increment next_blex_tuple[-1] for statements in the section
+                    # of code between this EnterLoop and the matching LeaveLoop.
+                    next_blex_tuple[-1] += 1
+
+                    # Upon entering a loop, add one blex dimension for the loop
+                    # iteration, add second blex dim to enumerate sections of
+                    # code within new loop
+                    next_blex_tuple.append(enter_iname)
+                    next_blex_tuple.append(0)
 
-                    # store tuples that will be used to create pairs
-                    # that will later be subtracted from happens-before map
+                    # Store 3 tuples that will be used later to create pairs
+                    # that will later be subtracted from the blex order map
                     lbound = iname_bounds_pwaff[enter_iname][0]
-                    first_iter_blex_pt = next_blex_pt[:]
+                    first_iter_blex_pt = next_blex_tuple[:]
                     first_iter_blex_pt[-2] = lbound
                     blex_exclusion_info[enter_iname] = {
-                        slex.PRE: tuple(pre_loop_blex_pt),  # make sure to copy
-                        slex.TOP: tuple(next_blex_pt),  # make sure to copy
-                        slex.FIRST: tuple(first_iter_blex_pt),  # make sure to copy
+                        slex.PRE: tuple(pre_loop_blex_pt),
+                        slex.TOP: tuple(next_blex_tuple),
+                        slex.FIRST: tuple(first_iter_blex_pt),
                         }
-                    blex_map_params |= set(lbound.get_var_names(dt.param))
+                    # (make sure ^these are copies)
+
+                    # Store any new params found
+                    blex_order_map_params |= set(lbound.get_var_names(dt.param))
 
             elif isinstance(lin_item, LeaveLoop):
                 leave_iname = lin_item.iname
                 if leave_iname in loops_with_barriers[sync_kind]:
 
-                    # update max blex dims
-                    n_seq_blex_dims = max(n_seq_blex_dims, len(next_blex_pt))
-                    iname_to_blex_dim[leave_iname] = len(next_blex_pt)-2
+                    # Update max blex dims
+                    n_seq_blex_dims = max(n_seq_blex_dims, len(next_blex_tuple))
 
-                    # update next blex pt
-                    pre_end_loop_blex_pt = next_blex_pt[:]
-                    next_blex_pt.pop()
-                    next_blex_pt.pop()
-                    next_blex_pt[-1] += 1
+                    # Record the blex dim for this loop iname
+                    iname_to_blex_dim[leave_iname] = len(next_blex_tuple)-2
 
-                    # store tuples that will be used to create pairs
-                    # that will later be subtracted from happens-before map
+                    # update next blex pt
+                    pre_end_loop_blex_pt = next_blex_tuple[:]
+
+                    # Upon leaving a loop:
+                    # - Pop lex dim for enumerating code sections within this loop
+                    # - Pop lex dim for the loop iteration
+                    # - Increment lex dim val enumerating items in current section
+                    next_blex_tuple.pop()
+                    next_blex_tuple.pop()
+                    next_blex_tuple[-1] += 1
+
+                    # Store 3 tuples that will be used later to create pairs
+                    # that will later be subtracted from the blex order map
                     ubound = iname_bounds_pwaff[leave_iname][1]
                     last_iter_blex_pt = pre_end_loop_blex_pt[:]
                     last_iter_blex_pt[-2] = ubound
@@ -429,19 +479,22 @@ def _collect_blex_ordering_info(sync_kind):
                     blex_exclusion_info[leave_iname][slex.LAST] = tuple(
                         last_iter_blex_pt)
                     blex_exclusion_info[leave_iname][slex.POST] = tuple(
-                        next_blex_pt)
+                        next_blex_tuple)
                     # (make sure ^these are copies)
-                    blex_map_params |= set(ubound.get_var_names(dt.param))
+
+                    # Store any new params found
+                    blex_order_map_params |= set(ubound.get_var_names(dt.param))
 
             elif isinstance(lin_item, RunInstruction):
-                # Add item to stmt_inst_to_blex
-                stmt_inst_to_blex[lin_item.insn_id] = tuple(next_blex_pt)
-                # Don't increment blex dim val
+                # Add stmt->blex pair to stmt_inst_to_blex
+                stmt_inst_to_blex[lin_item.insn_id] = tuple(next_blex_tuple)
+
+                # (Don't increment blex dim val)
 
             elif isinstance(lin_item, Barrier):
                 # Increment blex dim val if the sync scope matches
                 if lin_item.synchronization_kind == sync_kind:
-                    next_blex_pt[-1] += 1
+                    next_blex_tuple[-1] += 1
 
             else:
                 from loopy.schedule import (CallKernel, ReturnFromKernel)
@@ -450,7 +503,7 @@ def _collect_blex_ordering_info(sync_kind):
                     lin_item, (CallKernel, ReturnFromKernel))
                 pass
 
-        blex_map_params = sorted(blex_map_params)
+        blex_order_map_params = sorted(blex_order_map_params)
 
         # At this point, some blex tuples may have more dimensions than others;
         # the missing dims are the fastest-updating dims, and their values should
@@ -460,40 +513,50 @@ def _collect_blex_ordering_info(sync_kind):
 
         # }}}
 
+        # {{{ Create the blex order map
+
+        # {{{ Create the initial (pre-subtraction) blex order map
+
         # Create names for the blex dimensions for sequential loops
         seq_blex_dim_names = [
             LEX_VAR_PREFIX+str(i) for i in range(n_seq_blex_dims)]
         seq_blex_dim_names_prime = append_marker_to_strings(
             seq_blex_dim_names, marker=BEFORE_MARK)
 
+        # Begin with the blex order map created as a standard lex order map
         blex_order_map = create_lex_order_map(
             dim_names=seq_blex_dim_names,
             in_dim_marker=BEFORE_MARK,
             )
 
-        # Add lid/gid dims to lex order map
+        # Add LID/GID dims to blex order map
         blex_order_map = add_and_name_isl_dims(
-            blex_order_map, dt.out, conc_lex_dim_names)
+            blex_order_map, dt.out, all_par_lex_dim_names)
         blex_order_map = add_and_name_isl_dims(
-            blex_order_map, dt.in_, append_marker_to_strings(conc_lex_dim_names))
+            blex_order_map, dt.in_, append_marker_to_strings(all_par_lex_dim_names))
         if sync_kind == "local":
-            # Constrain gid vars to be equal
+            # For intra-group case, constrain GID 'before' to equal GID 'after'
             for var_name in gid_lex_dim_names:
                 blex_order_map = add_eq_isl_constraint_from_names(
                         blex_order_map, var_name, var_name+BEFORE_MARK)
-        # (if sync_kind == "global", don't need constraints on lid/gid vars)
+        # (if sync_kind == "global", don't need constraints on LID/GID vars)
+
+        # }}}
+
+        # {{{ Subtract unwanted pairs from happens-before blex map
 
+        # Create map from iname to corresponding blex dim name
         iname_to_blex_var = {}
         for iname, dim in iname_to_blex_dim.items():
             iname_to_blex_var[iname] = seq_blex_dim_names[dim]
             iname_to_blex_var[iname+BEFORE_MARK] = seq_blex_dim_names_prime[dim]
 
-        # Add params to blex map
+        # Add bounds params needed in blex map
         blex_order_map = add_and_name_isl_dims(
-            blex_order_map, dt.param, blex_map_params)
+            blex_order_map, dt.param, blex_order_map_params)
 
         # Get a set representing blex_order_map space
-        n_blex_dims = n_seq_blex_dims + len(conc_lex_dim_names)
+        n_blex_dims = n_seq_blex_dims + len(all_par_lex_dim_names)
         blex_set_template = isl.align_spaces(
             isl.Map("[ ] -> { [ ] -> [ ] }"), blex_order_map
             ).move_dims(
@@ -501,104 +564,129 @@ def _collect_blex_ordering_info(sync_kind):
             ).domain()
         blex_set_affs = isl.affs_from_space(blex_set_template.space)
 
+        # {{{ _create_excluded_map_for_iname
+
         def _create_excluded_map_for_iname(iname, blueprint):
-            # Note: blueprint[slex.FIRST] and blueprint[slex.LAST] contain pwaffs
+            # Create the blex->blex pairs that must be subtracted from the
+            # initial blex order map for this particular loop:
+            # PRE->FIRST, BOTTOM(iname')->TOP(iname'+1), LAST->POST
+
+            # Note: only blueprint[slex.FIRST] & blueprint[slex.LAST] contain pwaffs
+
+            # {{{ _create_blex_set_from_tuple_pair
 
             def _create_blex_set_from_tuple_pair(before, after, wrap_cond=False):
+                # Given a before->after tuple pair in the blueprint, which may
+                # have dim vals described by strings (inames) and pwaffs,
+                # create an ISL set in blex space that can be converted into
+                # the ISL map to be subtracted
 
-                # start with a set representing blex_order_map space
+                # Start with a set representing blex_order_map space
                 blex_set = blex_set_template.copy()
 
-                # add markers to inames in before tuple
-                # (assume strings are the inames)
+                # Add markers to inames in the 'before' tuple
+                # (all strings should be inames)
                 before_prime = tuple(
                     v+BEFORE_MARK if isinstance(v, str) else v for v in before)
                 before_padded = _pad_tuple_with_zeros(before_prime, n_seq_blex_dims)
                 after_padded = _pad_tuple_with_zeros(after, n_seq_blex_dims)
 
-                # assign vals to dims
+                # Assign vals in the tuple to dims in the ISL set
                 for dim_name, dim_val in zip(
                         seq_blex_dim_names_prime+seq_blex_dim_names,
                         before_padded+after_padded):
-                    # (could exploit knowledge of content types of odd/even
-                    # tuple dims to reduce conditionals but would be ugly
-                    # and less robust)
+
                     if isinstance(dim_val, int):
-                        # set idx to int val
+                        # Set idx to int val
                         blex_set &= blex_set_affs[dim_name].eq_set(
                             blex_set_affs[0]+dim_val)
                     elif isinstance(dim_val, str):
-                        # assume this is an iname, set idx to corresponding blex var
+                        # This is an iname, set idx to corresponding blex var
                         blex_set &= blex_set_affs[dim_name].eq_set(
                             blex_set_affs[iname_to_blex_var[dim_val]])
                     else:
+                        # This is a pwaff iname bound, align and intersect
                         assert isinstance(dim_val, isl.PwAff)
                         pwaff_aligned = isl.align_spaces(dim_val, blex_set_affs[0])
-                        # (doesn't matter which element of blex_set_affs we use^)
+                        # (doesn't matter which blex_set_affs item we align to^)
                         blex_set &= blex_set_affs[dim_name].eq_set(pwaff_aligned)
 
                 if wrap_cond:
-                    # i = i' + step
-                    # TODO what about step sizes != 1?
+                    # This is the BOTTOM->TOP pair, add condition i = i' + 1
                     blex_set &= blex_set_affs[iname_to_blex_var[iname]].eq_set(
                         blex_set_affs[iname_to_blex_var[iname+BEFORE_MARK]] + 1)
 
                 return blex_set
 
-            # enter loop case
+            # }}}
+
+            # Create pairs to be subtracted (sets will be converted to map)
+
+            # Enter loop case: PRE->FIRST
             full_blex_set = _create_blex_set_from_tuple_pair(
                 blueprint[slex.PRE], blueprint[slex.FIRST])
-            # wrap loop case
+            # Wrap loop case: BOTTOM(iname')->TOP(iname'+1)
             full_blex_set |= _create_blex_set_from_tuple_pair(
                 blueprint[slex.BOTTOM], blueprint[slex.TOP], wrap_cond=True)
-            # leave loop case
+            # Leave loop case: LAST->POST
             full_blex_set |= _create_blex_set_from_tuple_pair(
                 blueprint[slex.LAST], blueprint[slex.POST])
 
-            # add cond to fix iteration value for surrounding loops (i = i')
+            # Add condition to fix iteration value for *surrounding* loops (j = j')
             for surrounding_iname in blueprint[slex.PRE][1::2]:
                 s_blex_var = iname_to_blex_var[surrounding_iname]
                 full_blex_set &= blex_set_affs[s_blex_var].eq_set(
                     blex_set_affs[s_blex_var+BEFORE_MARK])
 
-            # convert blex set back to map
+            # Convert blex set back to map
             return isl.Map.from_domain(full_blex_set).move_dims(
                 dt.out, 0, dt.in_, n_blex_dims, n_blex_dims)
 
-        # subtract unwanted pairs from happens-before blex map
+        # }}}
+
+        # Create map for each iname
         maps_to_subtract = []
         for iname, subdict in blex_exclusion_info.items():
             maps_to_subtract.append(_create_excluded_map_for_iname(iname, subdict))
 
         if maps_to_subtract:
-            # get union of maps
+
+            # Get union of maps
             map_to_subtract = maps_to_subtract[0]
             for other_map in maps_to_subtract[1:]:
                 map_to_subtract |= other_map
 
-            # get some closure
+            # Get transitive closure of maps
             map_to_subtract, closure_exact = map_to_subtract.transitive_closure()
-            assert closure_exact  # TODO warn instead
+            assert closure_exact  # TODO warn instead?
 
-            # subtract from blex order map
+            # Subtract closure from blex order map
             blex_order_map = blex_order_map - map_to_subtract
 
+        # }}}
+
+        # }}}
+
         return (
             stmt_inst_to_blex,  # map stmt instances to blex space
             blex_order_map,
             seq_blex_dim_names,
             )
 
+    # }}}
+
+    # Get the blex schedule blueprint (dict will become a map below) and
+    # blex order map w.r.t. local and global barriers
     (stmt_inst_to_lblex,
      lblex_order_map,
-     seq_lblex_dim_names) = _collect_blex_ordering_info("local")
+     seq_lblex_dim_names) = _gather_blex_ordering_info("local")
     (stmt_inst_to_gblex,
      gblex_order_map,
-     seq_gblex_dim_names) = _collect_blex_ordering_info("global")
+     seq_gblex_dim_names) = _gather_blex_ordering_info("global")
 
     # }}}  end blex order/map machinery
 
-    # Second, create pairwise schedules for each individual pair of insns
+    # {{{ Create pairwise schedules (ISL maps) for each stmt pair
 
     from loopy.schedule.checker.utils import (
         sorted_union_of_names_in_isl_sets,
@@ -606,6 +694,8 @@ def _create_blex_set_from_tuple_pair(before, after, wrap_cond=False):
         insert_and_name_isl_dims,
     )
 
+    # {{{ _get_map_for_stmt()
+
     def _get_map_for_stmt(
             insn_id, lex_points, int_sid, lex_dim_names):
 
@@ -634,7 +724,7 @@ def _get_map_for_stmt(
                 dom, dt.set, [STATEMENT_VAR_NAME], 0)
 
         # Each map will map statement instances -> lex time.
-        # Right now, statement instance tuples consist of single int.
+        # At this point, statement instance tuples consist of single int.
         # Add all inames from domains to each map domain tuple.
         tuple_pair = [(
             (int_sid, ) + tuple(dom_inames_ordered),
@@ -642,9 +732,9 @@ def _get_map_for_stmt(
             )]
 
         # Note that lex_points may have fewer dims than the out-dim of sched_space
-        # if sched_space includes concurrent lid/gid dims. This is okay because
+        # if sched_space includes concurrent LID/GID dims. This is okay because
         # the following symbolic map creation step, when assigning dim values,
-        # zips the space dims with the lex tuple, and any leftover lid/gid dims
+        # zips the space dims with the lex tuple, and any leftover LID/GID dims
         # will not be assigned a value yet, which is what we want.
 
         # Create map
@@ -660,22 +750,25 @@ def _get_map_for_stmt(
 
         return sched_map
 
+    # }}}
+
     pairwise_schedules = {}
     for insn_ids in insn_id_pairs:
         # Determine integer IDs that will represent each statement in mapping
         # (dependency map creation assumes sid_before=0 and sid_after=1, unless
-        # before and after refer to same stmt, in which case sid_before=sid_after=0)
+        # before and after refer to same stmt, in which case
+        # sid_before=sid_after=0)
         int_sids = [0, 0] if insn_ids[0] == insn_ids[1] else [0, 1]
 
-        # {{{  Create SIO for intra-thread case (lid0' == lid0, etc)
+        # {{{  Create SIO for intra-thread case (lid0' == lid0, gid0' == gid0, etc)
 
         # Simplify tuples to the extent possible ------------------------------------
 
-        lex_tuples = [stmt_inst_to_lex[insn_id] for insn_id in insn_ids]
+        lex_tuples = [stmt_inst_to_lex_intra_thread[insn_id] for insn_id in insn_ids]
 
-        # At this point, one of the lex tuples may have more dimensions than another;
-        # the missing dims are the fastest-updating dims, and their values should
-        # be zero. Add them.
+        # At this point, one of the lex tuples may have more dimensions than
+        # another; the missing dims are the fastest-updating dims, and their
+        # values should be zero. Add them.
         max_lex_dims = max([len(lex_tuple) for lex_tuple in lex_tuples])
         lex_tuples_padded = [
             _pad_tuple_with_zeros(lex_tuple, max_lex_dims)
@@ -692,17 +785,12 @@ def _get_map_for_stmt(
         intra_thread_sched_maps = [
             _get_map_for_stmt(
                 insn_id, lex_tuple, int_sid,
-                seq_lex_dim_names+conc_lex_dim_names)
+                seq_lex_dim_names+all_par_lex_dim_names)
             for insn_id, lex_tuple, int_sid
             in zip(insn_ids, lex_tuples_simplified, int_sids)
             ]
 
-        # Create lex order maps and SIOs here (rather than returning schedules
-        # and lex maps separately and combining them outside function to get
-        # SIOs) to avoid passing extra info around. Don't want to, e.g.,
-        # examine the schedule tuple in separate func to re-determine which
-        # parallel dims are used. (could simplify everything by always using
-        # all dims..., which would make maps more complex than necessary)
+        # Create pairwise lex order map (pairwise only in the intra-thread case)
         lex_order_map = create_lex_order_map(
             dim_names=seq_lex_dim_names,
             in_dim_marker=BEFORE_MARK,
@@ -710,16 +798,16 @@ def _get_map_for_stmt(
 
         # Add lid/gid dims to lex order map
         lex_order_map = add_and_name_isl_dims(
-            lex_order_map, dt.out, conc_lex_dim_names)
+            lex_order_map, dt.out, all_par_lex_dim_names)
         lex_order_map = add_and_name_isl_dims(
-            lex_order_map, dt.in_, append_marker_to_strings(conc_lex_dim_names))
+            lex_order_map, dt.in_, append_marker_to_strings(all_par_lex_dim_names))
         # Constrain lid/gid vars to be equal
-        for var_name in conc_lex_dim_names:
+        for var_name in all_par_lex_dim_names:
             lex_order_map = add_eq_isl_constraint_from_names(
                 lex_order_map, var_name, var_name+BEFORE_MARK)
 
         # Create statement instance ordering,
-        # maps each statement instance to all statement instances occuring later
+        # maps each statement instance to all statement instances occurring later
         sio_seq = get_statement_ordering_map(
             *intra_thread_sched_maps,  # note, func accepts exactly two maps
             lex_order_map,
@@ -730,8 +818,6 @@ def _get_map_for_stmt(
 
         # {{{  Create SIOs for intra-group case (gid0' == gid0, etc)
 
-        # TODO finish separating lid stuff from gid stuff
-
         # Use *unsimplified* lex tuples with blex map, which have already been padded
 
         lblex_tuples_padded = [stmt_inst_to_lblex[insn_id] for insn_id in insn_ids]
@@ -739,7 +825,7 @@ def _get_map_for_stmt(
         lconc_sched_maps = [
             _get_map_for_stmt(
                 insn_id, lblex_tuple, int_sid,
-                seq_lblex_dim_names+conc_lex_dim_names)  # conc names same for all
+                seq_lblex_dim_names+all_par_lex_dim_names)  # Par names same for all
             for insn_id, lblex_tuple, int_sid
             in zip(insn_ids, lblex_tuples_padded, int_sids)
             ]
@@ -758,7 +844,7 @@ def _get_map_for_stmt(
         gconc_sched_maps = [
             _get_map_for_stmt(
                 insn_id, gblex_tuple, int_sid,
-                seq_gblex_dim_names+conc_lex_dim_names)  # conc names same for all
+                seq_gblex_dim_names+all_par_lex_dim_names)  # Par names same for all
             for insn_id, gblex_tuple, int_sid
             in zip(insn_ids, gblex_tuples_padded, int_sids)
             ]
@@ -781,7 +867,9 @@ def _get_map_for_stmt(
                 (sio_gconc, tuple(gconc_sched_maps), ),
                 )
         else:
-            # Store SIOs
+            # Store SIOs only
             pairwise_schedules[tuple(insn_ids)] = (sio_seq, sio_lconc, sio_gconc)
 
+    # }}}
+
     return pairwise_schedules

From 2cd77f6c3ed93bdfea7dbc29beedec3221f1cee9 Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Fri, 26 Mar 2021 21:57:53 -0500
Subject: [PATCH 218/460] another doctest fix

---
 loopy/schedule/checker/__init__.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/loopy/schedule/checker/__init__.py b/loopy/schedule/checker/__init__.py
index e99658cd1..a5858bde0 100644
--- a/loopy/schedule/checker/__init__.py
+++ b/loopy/schedule/checker/__init__.py
@@ -82,7 +82,7 @@ def get_schedules_for_statement_pairs(
         ...     ).replace("{ ", "{\n").replace(" :", "\n:"))
         [pi, pj, pk] -> {
         [_lp_linchk_stmt' = 0, j', k'] -> [_lp_linchk_stmt = 1, j, k]
-        : 0 <= j < pj and 0 <= k < pk and 0 <= j' < pj and 0 <= k' < pk }
+        : 0 <= j' < pj and 0 <= k' < pk and 0 <= j < pj and 0 <= k < pk }
 
     """
     # TODO update docs and doctest now that we're returning SIOs

From 3d939eff3a55aef47fc597c075d594ef24a67ddd Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Sat, 27 Mar 2021 17:04:02 -0500
Subject: [PATCH 219/460] more code cleanup and organization

---
 loopy/schedule/checker/schedule.py | 198 ++++++++++++++++-------------
 1 file changed, 113 insertions(+), 85 deletions(-)

diff --git a/loopy/schedule/checker/schedule.py b/loopy/schedule/checker/schedule.py
index 714044a78..b849a2ccf 100644
--- a/loopy/schedule/checker/schedule.py
+++ b/loopy/schedule/checker/schedule.py
@@ -23,6 +23,9 @@
 import islpy as isl
 dt = isl.dim_type.set
 
+
+# {{{ Constants
+
 __doc__ = """
 
 .. data:: LIN_CHECK_IDENTIFIER_PREFIX
@@ -71,10 +74,20 @@
     GTAG_VAR_NAMES.append("%sgid%d" % (LIN_CHECK_IDENTIFIER_PREFIX, par_level))
 BEFORE_MARK = "'"
 
+# }}}
+
+
+# {{{ Helper Functions
+
+# {{{ _pad_tuple_with_zeros
 
 def _pad_tuple_with_zeros(tup, desired_length):
     return tup[:] + tuple([0]*(desired_length-len(tup)))
 
+# }}}
+
+
+# {{{ _simplify_lex_dims
 
 def _simplify_lex_dims(tup0, tup1):
     """Simplify a pair of lex tuples in order to reduce the complexity of
@@ -129,6 +142,12 @@ def _simplify_lex_dims(tup0, tup1):
     else:
         return tuple(new_tup0), tuple(new_tup1)
 
+# }}}
+
+# }}}
+
+
+# {{{ class SpecialLexPointWRTLoop
 
 class SpecialLexPointWRTLoop:
     """Strings specifying a particular position in a lexicographic
@@ -166,6 +185,10 @@ class SpecialLexPointWRTLoop:
     LAST = "last"
     POST = "post"
 
+# }}}
+
+
+# {{{ generate_pairwise_schedules
 
 def generate_pairwise_schedules(
         knl,
@@ -219,6 +242,9 @@ def generate_pairwise_schedules(
         add_and_name_isl_dims,
         append_marker_to_strings,
         add_eq_isl_constraint_from_names,
+        sorted_union_of_names_in_isl_sets,
+        create_symbolic_map_from_tuples,
+        insert_and_name_isl_dims,
     )
     slex = SpecialLexPointWRTLoop
 
@@ -362,22 +388,24 @@ def generate_pairwise_schedules(
 
     # (may be combined with pass above in future)
 
-    # In blex space, we order barrier-delimited sections of code.
-    # Each statement instance within a single barrier-delimited section will
-    # map to the same blex point. The resulting statement instance ordering
-    # will map each statement to all statements that occur in a later
-    # barrier-delimited section.
-
-    # To achieve this, we will first create a map from statement instances to
-    # lexicographic space almost as before, though we will not increment the
-    # fastest-updating lex dim with each statement, and we will increment it
-    # with each barrier encountered. To denote these differences, we refer to
-    # this space as 'blex' space.
-    # The resulting pairwise schedule, if composed with a map defining a
-    # standard lexicographic ordering (SIO), would include a number of unwanted
-    # 'before->after' pairs, so before creating the SIO, we will subtract the
-    # unwanted pairs from the standard lex order map, yielding the 'blex' order
-    # map.
+    """In blex space, we order barrier-delimited sections of code.
+    Each statement instance within a single barrier-delimited section will
+    map to the same blex point. The resulting statement instance ordering
+    (SIO) will map each statement to all statements that occur in a later
+    barrier-delimited section.
+
+    To achieve this, we will first create a map from statement instances to
+    lexicographic space almost as we did above in the intra-thread case,
+    though we will not increment the fastest-updating lex dim with each
+    statement, and we will increment it with each barrier encountered. To
+    denote these differences, we refer to this space as 'blex' space.
+
+    The resulting pairwise schedule, if composed with a map defining a
+    standard lexicographic ordering to create an SIO, would include a number
+    of unwanted 'before->after' pairs of statement instances, so before
+    creating the SIO, we will subtract unwanted pairs from a standard
+    lex order map, yielding the 'blex' order map.
+    """
 
     # {{{ Get upper and lower bound for each loop that contains a barrier
 
@@ -389,18 +417,21 @@ def generate_pairwise_schedules(
 
     # }}}
 
+    # {{{ Create blex order maps and blex tuples defining statement ordering (x2)
+
     all_par_lex_dim_names = lid_lex_dim_names + gid_lex_dim_names
 
-    # {{{ _gather_blex_ordering_info()
+    # {{{ _gather_blex_ordering_info(sync_kind): gather blex info for sync_kind
 
     def _gather_blex_ordering_info(sync_kind):
-        # For the given sync_kind ("local" or "global"), create a mapping from
-        # statement instances to blex space (dict), as well as a mapping
-        # defining the blex ordering (isl map from blex space -> blex space)
+        """For the given sync_kind ("local" or "global"), create a mapping from
+        statement instances to blex space (dict), as well as a mapping
+        defining the blex ordering (isl map from blex space -> blex space)
 
-        # Note that, unlike in the intra-thread case, there will be a single
-        # blex ordering map defining the blex ordering for all statement pairs,
-        # rather than separate (smaller) lex ordering maps for each pair
+        Note that, unlike in the intra-thread case, there will be a single
+        blex ordering map defining the blex ordering for all statement pairs,
+        rather than separate (smaller) lex ordering maps for each pair
+        """
 
         # {{{ First, create map from stmt instances to blex space.
 
@@ -458,9 +489,8 @@ def _gather_blex_ordering_info(sync_kind):
                     # Record the blex dim for this loop iname
                     iname_to_blex_dim[leave_iname] = len(next_blex_tuple)-2
 
-                    # update next blex pt
+                    # Update next blex pt
                     pre_end_loop_blex_pt = next_blex_tuple[:]
-
                     # Upon leaving a loop:
                     # - Pop lex dim for enumerating code sections within this loop
                     # - Pop lex dim for the loop iteration
@@ -513,7 +543,7 @@ def _gather_blex_ordering_info(sync_kind):
 
         # }}}
 
-        # {{{ Create the blex order map
+        # {{{ Second, create the blex order map
 
         # {{{ Create the initial (pre-subtraction) blex order map
 
@@ -567,19 +597,26 @@ def _gather_blex_ordering_info(sync_kind):
         # {{{ _create_excluded_map_for_iname
 
         def _create_excluded_map_for_iname(iname, blueprint):
-            # Create the blex->blex pairs that must be subtracted from the
-            # initial blex order map for this particular loop:
-            # PRE->FIRST, BOTTOM(iname')->TOP(iname'+1), LAST->POST
+            """Create the blex->blex pairs that must be subtracted from the
+            initial blex order map for this particular loop using the 6 blex
+            tuples in the blueprint:
+            PRE->FIRST, BOTTOM(iname')->TOP(iname'+1), LAST->POST
+            """
 
             # Note: only blueprint[slex.FIRST] & blueprint[slex.LAST] contain pwaffs
 
             # {{{ _create_blex_set_from_tuple_pair
 
             def _create_blex_set_from_tuple_pair(before, after, wrap_cond=False):
-                # Given a before->after tuple pair in the blueprint, which may
-                # have dim vals described by strings (inames) and pwaffs,
-                # create an ISL set in blex space that can be converted into
-                # the ISL map to be subtracted
+                """Given a before->after tuple pair in the blueprint, which may
+                have dim vals described by ints, strings (inames), and pwaffs,
+                create an ISL set in blex space that can be converted into
+                the ISL map to be subtracted
+                """
+                # (Vars from outside func used here:
+                # iname, blex_set_affs, blex_set_template, iname_to_blex_var,
+                # n_seq_blex_dims, seq_blex_dim_names,
+                # seq_blex_dim_names_prime)
 
                 # Start with a set representing blex_order_map space
                 blex_set = blex_set_template.copy()
@@ -618,9 +655,10 @@ def _create_blex_set_from_tuple_pair(before, after, wrap_cond=False):
 
                 return blex_set
 
-            # }}}
+            # }}} end _create_blex_set_from_tuple_pair()
 
-            # Create pairs to be subtracted (sets will be converted to map)
+            # Create pairs to be subtracted
+            # (set will be converted to map)
 
             # Enter loop case: PRE->FIRST
             full_blex_set = _create_blex_set_from_tuple_pair(
@@ -642,9 +680,9 @@ def _create_blex_set_from_tuple_pair(before, after, wrap_cond=False):
             return isl.Map.from_domain(full_blex_set).move_dims(
                 dt.out, 0, dt.in_, n_blex_dims, n_blex_dims)
 
-        # }}}
+        # }}} end _create_excluded_map_for_iname()
 
-        # Create map for each iname
+        # Create map to subtract for each iname
         maps_to_subtract = []
         for iname, subdict in blex_exclusion_info.items():
             maps_to_subtract.append(_create_excluded_map_for_iname(iname, subdict))
@@ -673,7 +711,7 @@ def _create_blex_set_from_tuple_pair(before, after, wrap_cond=False):
             seq_blex_dim_names,
             )
 
-    # }}}
+    # }}} end _gather_blex_ordering_info(sync_kind)
 
     # Get the blex schedule blueprint (dict will become a map below) and
     # blex order map w.r.t. local and global barriers
@@ -684,15 +722,11 @@ def _create_blex_set_from_tuple_pair(before, after, wrap_cond=False):
      gblex_order_map,
      seq_gblex_dim_names) = _gather_blex_ordering_info("global")
 
-    # }}}  end blex order/map machinery
+    # }}}
 
-    # {{{ Create pairwise schedules (ISL maps) for each stmt pair
+    # }}}  end intra-group and global blex order creation
 
-    from loopy.schedule.checker.utils import (
-        sorted_union_of_names_in_isl_sets,
-        create_symbolic_map_from_tuples,
-        insert_and_name_isl_dims,
-    )
+    # {{{ Create pairwise schedules (ISL maps) for each stmt pair
 
     # {{{ _get_map_for_stmt()
 
@@ -816,45 +850,37 @@ def _get_map_for_stmt(
 
         # }}}
 
-        # {{{  Create SIOs for intra-group case (gid0' == gid0, etc)
-
-        # Use *unsimplified* lex tuples with blex map, which have already been padded
-
-        lblex_tuples_padded = [stmt_inst_to_lblex[insn_id] for insn_id in insn_ids]
-
-        lconc_sched_maps = [
-            _get_map_for_stmt(
-                insn_id, lblex_tuple, int_sid,
-                seq_lblex_dim_names+all_par_lex_dim_names)  # Par names same for all
-            for insn_id, lblex_tuple, int_sid
-            in zip(insn_ids, lblex_tuples_padded, int_sids)
-            ]
-
-        # Create statement instance ordering
-        sio_lconc = get_statement_ordering_map(
-            *lconc_sched_maps,  # note, func accepts exactly two maps
-            lblex_order_map,
-            before_marker=BEFORE_MARK,
-            )
+        # {{{  Create SIOs for intra-group case (gid0' == gid0, etc) and global case
+
+        def _get_sched_maps_and_sio(
+                stmt_inst_to_blex, blex_order_map, seq_blex_dim_names):
+            # (Vars from outside func used here:
+            # insn_ids, int_sids, all_par_lex_dim_names)
+
+            # Use *unsimplified* lex tuples w/ blex map, which are already padded
+            blex_tuples_padded = [stmt_inst_to_blex[insn_id] for insn_id in insn_ids]
+
+            par_sched_maps = [
+                _get_map_for_stmt(
+                    insn_id, blex_tuple, int_sid,
+                    seq_blex_dim_names+all_par_lex_dim_names)  # all par names
+                for insn_id, blex_tuple, int_sid
+                in zip(insn_ids, blex_tuples_padded, int_sids)
+                ]
+
+            # Create statement instance ordering
+            sio_par = get_statement_ordering_map(
+                *par_sched_maps,  # note, func accepts exactly two maps
+                blex_order_map,
+                before_marker=BEFORE_MARK,
+                )
 
-        # TODO use func to avoid duplicated code here:
+            return par_sched_maps, sio_par
 
-        gblex_tuples_padded = [stmt_inst_to_gblex[insn_id] for insn_id in insn_ids]
-
-        gconc_sched_maps = [
-            _get_map_for_stmt(
-                insn_id, gblex_tuple, int_sid,
-                seq_gblex_dim_names+all_par_lex_dim_names)  # Par names same for all
-            for insn_id, gblex_tuple, int_sid
-            in zip(insn_ids, gblex_tuples_padded, int_sids)
-            ]
-
-        # Create statement instance ordering
-        sio_gconc = get_statement_ordering_map(
-            *gconc_sched_maps,  # note, func accepts exactly two maps
-            gblex_order_map,
-            before_marker=BEFORE_MARK,
-            )
+        lpar_sched_maps, sio_lpar = _get_sched_maps_and_sio(
+            stmt_inst_to_lblex, lblex_order_map, seq_lblex_dim_names)
+        gpar_sched_maps, sio_gpar = _get_sched_maps_and_sio(
+            stmt_inst_to_gblex, gblex_order_map, seq_gblex_dim_names)
 
         # }}}
 
@@ -863,13 +889,15 @@ def _get_map_for_stmt(
             # (currently helpful for testing; also could be desired by a user)
             pairwise_schedules[tuple(insn_ids)] = (
                 (sio_seq, tuple(intra_thread_sched_maps), ),
-                (sio_lconc, tuple(lconc_sched_maps), ),
-                (sio_gconc, tuple(gconc_sched_maps), ),
+                (sio_lpar, tuple(lpar_sched_maps), ),
+                (sio_gpar, tuple(gpar_sched_maps), ),
                 )
         else:
             # Store SIOs only
-            pairwise_schedules[tuple(insn_ids)] = (sio_seq, sio_lconc, sio_gconc)
+            pairwise_schedules[tuple(insn_ids)] = (sio_seq, sio_lpar, sio_gpar)
 
     # }}}
 
     return pairwise_schedules
+
+# }}}

From 0f3f2799f609eb17d24694bec50499c8e3212103 Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Sat, 27 Mar 2021 17:05:30 -0500
Subject: [PATCH 220/460] another doctest typo

---
 loopy/schedule/checker/__init__.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/loopy/schedule/checker/__init__.py b/loopy/schedule/checker/__init__.py
index a5858bde0..7989df68d 100644
--- a/loopy/schedule/checker/__init__.py
+++ b/loopy/schedule/checker/__init__.py
@@ -80,7 +80,7 @@ def get_schedules_for_statement_pairs(
         >>> # Print map
         >>> print(str(sio_dict[("insn_a", "insn_b")][0]
         ...     ).replace("{ ", "{\n").replace(" :", "\n:"))
-        [pi, pj, pk] -> {
+        [pj, pk] -> {
         [_lp_linchk_stmt' = 0, j', k'] -> [_lp_linchk_stmt = 1, j, k]
         : 0 <= j' < pj and 0 <= k' < pk and 0 <= j < pj and 0 <= k < pk }
 

From c0a4c58e97d3e54ee094658bf8aafb12116dfacc Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Sat, 27 Mar 2021 19:57:44 -0500
Subject: [PATCH 221/460] rename
 get_schedules_for_statement_pairs()->get_pairwise_statement_orderings() and
 generate_pairwise_schedules()->get_pairwise_statement_orderings_inner();
 update lots of documentation

---
 loopy/schedule/checker/__init__.py            | 68 +++++++++++--------
 .../checker/lexicographic_order_map.py        | 67 +++++++++---------
 loopy/schedule/checker/schedule.py            | 44 +++++++-----
 test/test_linearization_checker.py            | 20 +++---
 4 files changed, 107 insertions(+), 92 deletions(-)

diff --git a/loopy/schedule/checker/__init__.py b/loopy/schedule/checker/__init__.py
index 7989df68d..7644fddac 100644
--- a/loopy/schedule/checker/__init__.py
+++ b/loopy/schedule/checker/__init__.py
@@ -21,39 +21,46 @@
 """
 
 
-# {{{ create a pairwise schedules for statement pairs
+# {{{ get pairwise statement orderings
 
-def get_schedules_for_statement_pairs(
+def get_pairwise_statement_orderings(
         knl,
-        linearization_items,
+        lin_items,
         insn_id_pairs,
         return_schedules=False,
         ):
     r"""For each statement pair in a subset of all statement pairs found in a
     linearized kernel, determine the (relative) order in which the statement
-    instances are executed. For each pair, describe this relative ordering with
-    a pair of mappings from statement instances to points in a single
-    lexicographic ordering (a ``pairwise schedule''). When determining the
-    relative ordering, ignore concurrent inames.
+    instances are executed. For each pair, represent this relative ordering as
+    a ``statement instance ordering`` (SIO): a map from each instance of the
+    first statement to all instances of the second statement that occur
+    later.
 
     :arg knl: A preprocessed :class:`loopy.kernel.LoopKernel` containing the
-        linearization items that will be used to create a schedule.
+        linearization items that will be used to create the SIOs.
 
-    :arg linearization_items: A list of :class:`loopy.schedule.ScheduleItem`
-        (to be renamed to `loopy.schedule.LinearizationItem`) containing
-        all linearization items for which pairwise schedules will be
-        created. To allow usage of this routine during linearization, a
-        truncated (i.e. partial) linearization may be passed through this
-        argument.
+    :arg lin_items: A list of :class:`loopy.schedule.ScheduleItem`
+        (to be renamed to `loopy.schedule.LinearizationItem`) containing all
+        linearization items for which SIOs will be created. To allow usage of
+        this routine during linearization, a truncated (i.e. partial)
+        linearization may be passed through this argument.
 
-    :arg insn_id_pairs: A list containing pairs of instruction
-        identifiers.
+    :arg insn_id_pairs: A list containing pairs of instruction identifiers.
+
+    :arg return_schedules: A :class:`bool` determining whether to include
+        pairwise schedules in the returned dictionary.
 
     :returns: A dictionary mapping each two-tuple of instruction identifiers
-        provided in `insn_id_pairs` to a corresponding two-tuple containing two
-        :class:`islpy.Map`\ s representing a pairwise schedule as two
-        mappings from statement instances to lexicographic time, one for
-        each of the two statements.
+        provided in `insn_id_pairs` to a statement instance ordering, realized
+        as an :class:`islpy.Map` from each instance of the first statement to
+        all instances of the second statement that occur later.
+
+        Optional (mainly used for testing): If `return_schedules = True`, each
+        dict value will be a two-tuple containing the statement instance
+        ordering and also a ``pairwise schedule'', a pair of mappings from
+        statement instances to points in a single lexicographic ordering,
+        realized as a two-tuple containing two :class:`islpy.Map`\ s, one for
+        each statement.
 
     .. doctest:
 
@@ -70,9 +77,9 @@ def get_schedules_for_statement_pairs(
         >>> # Get a linearization
         >>> knl = lp.get_one_linearized_kernel(lp.preprocess_kernel(knl))
         >>> # Get a pairwise schedule -----------------------------------------------
-        >>> from loopy.schedule.checker import get_schedules_for_statement_pairs
+        >>> from loopy.schedule.checker import get_pairwise_statement_orderings
         >>> # Get two maps ----------------------------------------------------------
-        >>> sio_dict = get_schedules_for_statement_pairs(
+        >>> sio_dict = get_pairwise_statement_orderings(
         ...     knl,
         ...     knl.linearization,
         ...     [("insn_a", "insn_b")],
@@ -85,7 +92,6 @@ def get_schedules_for_statement_pairs(
         : 0 <= j' < pj and 0 <= k' < pk and 0 <= j < pj and 0 <= k < pk }
 
     """
-    # TODO update docs and doctest now that we're returning SIOs
 
     # {{{ make sure kernel has been preprocessed
 
@@ -97,16 +103,17 @@ def get_schedules_for_statement_pairs(
     # }}}
 
     # {{{ Find any EnterLoop inames that are tagged as concurrent
-    # so that generate_pairwise_schedule() knows to ignore them
+    # so that get_pairwise_statement_orderings_inner() knows to ignore them
     # (In the future, this shouldn't be necessary because there
     # won't be any inames with ConcurrentTags in EnterLoop linearization items.
-    # Test which exercises this: test_linearization_checker_with_stroud_bernstein())
+    # Test which exercises this in downstream PR:
+    # test_linearization_checker_with_stroud_bernstein())
     from loopy.schedule.checker.utils import (
         partition_inames_by_concurrency,
         get_EnterLoop_inames,
     )
     conc_inames, _ = partition_inames_by_concurrency(knl)
-    enterloop_inames = get_EnterLoop_inames(linearization_items)
+    enterloop_inames = get_EnterLoop_inames(lin_items)
     conc_loop_inames = conc_inames & enterloop_inames
 
     # The only concurrent EnterLoop inames should be Vec and ILP
@@ -122,11 +129,12 @@ def get_schedules_for_statement_pairs(
 
     # {{{ Create two mappings from {statement instance: lex point}
 
-    # include only instructions involved in this dependency
-    from loopy.schedule.checker.schedule import generate_pairwise_schedules
-    return generate_pairwise_schedules(
+    from loopy.schedule.checker.schedule import (
+        get_pairwise_statement_orderings_inner
+    )
+    return get_pairwise_statement_orderings_inner(
         knl,
-        linearization_items,
+        lin_items,
         insn_id_pairs,
         loops_to_ignore=conc_loop_inames,
         return_schedules=return_schedules,
diff --git a/loopy/schedule/checker/lexicographic_order_map.py b/loopy/schedule/checker/lexicographic_order_map.py
index 7927812b5..20f889975 100644
--- a/loopy/schedule/checker/lexicographic_order_map.py
+++ b/loopy/schedule/checker/lexicographic_order_map.py
@@ -77,33 +77,34 @@ def get_lex_order_set(
         in_dim_marker="'",
         ):
     """Return an :class:`islpy.Set` representing a lexicographic ordering
-        with the number of dimensions provided in `before_names`
-        (equal to the number of dimensions in `after_names`).
+        over a space with the number of dimensions provided in `dim_names`
+        (the set itself will have twice this many dimensions in order to
+        represent the ordering as before-after pairs of points).
 
-    :arg before_names: A list of :class:`str` variable names to be used
+    :arg dim_names: A list of :class:`str` variable names to be used
         to describe lexicographic space dimensions for a point in a lexicographic
-        ordering that occurs before another point, which will be represented using
-        `after_names`. (see example below)
+        ordering. (see example below)
 
-    :arg after_names: A list of :class:`str` variable names to be used
-        to describe lexicographic space dimensions for a point in a lexicographic
-        ordering that occurs after another point, which will be represented using
-        `before_names`. (see example below)
-
-    :arg islvars: A dictionary mapping variable names in `before_names` and
-        `after_names` to :class:`islpy.PwAff` instances that represent each
-        of the variables (islvars may be produced by `islpy.make_zero_and_vars`).
+    :arg islvars: A dictionary mapping variable names in `dim_names` to
+        :class:`islpy.PwAff` instances that represent each of the variables
+        (islvars may be produced by `islpy.make_zero_and_vars`).
         The key '0' is also include and represents a :class:`islpy.PwAff` zero
-        constant. This dictionary defines the space to be used for the set. If no
-        value is passed, the dictionary will be made using `before_names`
-        and `after_names`.
-
-    :returns: An :class:`islpy.Set` representing a big-endian lexicographic ordering
-        with the number of dimensions provided in `before_names`. The set
-        has one dimension for each name in *both* `before_names` and
-        `after_names`, and contains all points which meet a 'happens before'
+        constant. This dictionary defines the space to be used for the set and
+        must also include versions of `dim_names` with the `in_dim_marker`
+        appended. If no value is passed, the dictionary will be made using
+        `dim_names` and `dim_names` with the `in_dim_marker` appended.
+
+    :arg in_dim_marker: A :class:`str` to be appended to dimension names to
+        distinguish corresponding dimensions in before-after pairs of points.
+        (see example below)
+
+    :returns: An :class:`islpy.Set` representing a big-endian lexicographic
+        ordering with the number of dimensions provided in `dim_names`. The set
+        has two dimensions for each name in `dim_names`, one identified by the
+        given name and another identified by the same name with `in_dim_marker`
+        appended. The set contains all points which meet a 'happens before'
         constraint defining the lexicographic ordering. E.g., if
-        `before_names = [i0', i1', i2']` and `after_names = [i0, i1, i2]`,
+        `dim_names = [i0, i1, i2]` and `in_dim_marker="'"`,
         return the set containing all points in a 3-dimensional, big-endian
         lexicographic ordering such that point
         `[i0', i1', i2']` happens before `[i0, i1, i2]`. I.e., return::
@@ -113,7 +114,6 @@ def get_lex_order_set(
                 or (i0' = i0 and i1' = i1 and i2' < i2)}
 
     """
-    # TODO update doc
 
     from loopy.schedule.checker.utils import (
         append_marker_to_strings,
@@ -165,30 +165,27 @@ def create_lex_order_map(
 
     :arg n_dims: An :class:`int` representing the number of dimensions
         in the lexicographic ordering. If not provided, `n_dims` will be
-        set to length of `after_names`.
+        set to length of `dim_names`.
 
-    :arg before_names: A list of :class:`str` variable names to be used
-        to describe lexicographic space dimensions for a point in a lexicographic
-        ordering that occurs before another point, which will be represented using
-        `after_names`. (see example below)
+    :arg dim_names: A list of :class:`str` variable names for the
+        lexicographic space dimensions.
 
-    :arg after_names: A list of :class:`str` variable names to be used
-        to describe lexicographic space dimensions for a point in a lexicographic
-        ordering that occurs after another point, which will be represented using
-        `before_names`. (see example below)
+    :arg in_dim_marker: A :class:`str` to be appended to `dim_names` to create
+        the names for the input dimensions of the map, thereby distinguishing
+        them from the corresponding output dimensions in before-after pairs of
+        points. (see example below)
 
     :returns: An :class:`islpy.Map` representing a lexicographic
         ordering as a mapping from each point in lexicographic time
         to every point that occurs later in lexicographic time.
-        E.g., if `before_names = [i0', i1', i2']` and
-        `after_names = [i0, i1, i2]`, return the map::
+        E.g., if `dim_names = [i0, i1, i2]` and `in_dim_marker = "'"`,
+        return the map::
 
             {[i0', i1', i2'] -> [i0, i1, i2] :
                 i0' < i0 or (i0' = i0 and i1' < i1)
                 or (i0' = i0 and i1' = i1 and i2' < i2)}
 
     """
-    # TODO update doc
 
     if dim_names is None:
         dim_names = ["i%s" % (i) for i in range(n_dims)]
diff --git a/loopy/schedule/checker/schedule.py b/loopy/schedule/checker/schedule.py
index b849a2ccf..7fef51819 100644
--- a/loopy/schedule/checker/schedule.py
+++ b/loopy/schedule/checker/schedule.py
@@ -150,8 +150,8 @@ def _simplify_lex_dims(tup0, tup1):
 # {{{ class SpecialLexPointWRTLoop
 
 class SpecialLexPointWRTLoop:
-    """Strings specifying a particular position in a lexicographic
-       ordering of statements relative to a loop.
+    """Strings identifying a particular point or set of points in a
+        lexicographic ordering of statements, specified relative to a loop.
 
     .. attribute:: PRE
        A :class:`str` indicating the last lexicographic point that
@@ -188,9 +188,9 @@ class SpecialLexPointWRTLoop:
 # }}}
 
 
-# {{{ generate_pairwise_schedules
+# {{{ get_pairwise_statement_orderings_inner
 
-def generate_pairwise_schedules(
+def get_pairwise_statement_orderings_inner(
         knl,
         lin_items,
         insn_id_pairs,
@@ -199,21 +199,23 @@ def generate_pairwise_schedules(
         ):
     r"""For each statement pair in a subset of all statement pairs found in a
     linearized kernel, determine the (relative) order in which the statement
-    instances are executed. For each pair, describe this relative ordering with
-    a pair of mappings from statement instances to points in a single
-    lexicographic ordering (a ``pairwise schedule'').
+    instances are executed. For each pair, represent this relative ordering as
+    a ``statement instance ordering`` (SIO): a map from each instance of the
+    first statement to all instances of the second statement that occur
+    later.
 
     :arg knl: A preprocessed :class:`loopy.kernel.LoopKernel` containing the
-        linearization items that will be used to create a schedule. This
+        linearization items that will be used to create the SIOs. This
         kernel will be used to get the domains associated with the inames
-        used in the statements.
+        used in the statements, and to determine which inames have been
+        tagged with parallel tags.
 
     :arg lin_items: A list of :class:`loopy.schedule.ScheduleItem`
         (to be renamed to `loopy.schedule.LinearizationItem`) containing
-        all linearization items for which pairwise schedules will be
+        all linearization items for which SIOs will be
         created. To allow usage of this routine during linearization, a
         truncated (i.e. partial) linearization may be passed through this
-        argument.
+        argument
 
     :arg insn_id_pairs: A list containing pairs of instruction identifiers.
 
@@ -222,14 +224,22 @@ def generate_pairwise_schedules(
         contain concurrent inames tagged with the ``vec`` or ``ilp`` array
         access tags.
 
+    :arg return_schedules: A :class:`bool` determining whether to include
+        pairwise schedules in the returned dictionary.
+
     :returns: A dictionary mapping each two-tuple of instruction identifiers
-        provided in `insn_id_pairs` to a corresponding two-tuple containing two
-        :class:`islpy.Map`\ s representing a pairwise schedule as two
-        mappings from statement instances to lexicographic time, one for
-        each of the two statements.
+        provided in `insn_id_pairs` to a statement instance ordering, realized
+        as an :class:`islpy.Map` from each instance of the first
+        statement to all instances of the second statement that occur later.
+
+        Optional (mainly used for testing): If `return_schedules=True`,
+        each dict value will be a two-tuple containing the statement instance
+        ordering and also a ``pairwise schedule'', a pair of
+        mappings from statement instances to points in a single lexicographic
+        ordering, realized as a two-tuple containing two
+        :class:`islpy.Map`\ s, one for each statement.
+
     """
-    # TODO update docs now that we're returning SIOs
-    # TODO rename loops_to_ignore to loops_to_ignore_for_intra_thread_stuff...
     # TODO handle 'vec' appropriately; then remove loops_to_ignore?
 
     from loopy.schedule import (EnterLoop, LeaveLoop, Barrier, RunInstruction)
diff --git a/test/test_linearization_checker.py b/test/test_linearization_checker.py
index 38e9309ff..63126643a 100644
--- a/test/test_linearization_checker.py
+++ b/test/test_linearization_checker.py
@@ -95,7 +95,7 @@ def _isl_map_with_marked_dims(s):
 
 def test_pairwise_schedule_creation():
     from loopy.schedule.checker import (
-        get_schedules_for_statement_pairs,
+        get_pairwise_statement_orderings,
     )
 
     # Example kernel
@@ -144,7 +144,7 @@ def test_pairwise_schedule_creation():
         ("stmt_b", "stmt_d"),
         ("stmt_c", "stmt_d"),
         ]
-    scheds = get_schedules_for_statement_pairs(
+    scheds = get_pairwise_statement_orderings(
         lin_knl,
         linearization_items,
         insn_id_pairs,
@@ -315,7 +315,7 @@ def test_pairwise_schedule_creation():
 
 def test_pairwise_schedule_creation_with_hw_par_tags():
     from loopy.schedule.checker import (
-        get_schedules_for_statement_pairs,
+        get_pairwise_statement_orderings,
     )
 
     # Example kernel
@@ -351,7 +351,7 @@ def test_pairwise_schedule_creation_with_hw_par_tags():
     stmt_id_pairs = [
         ("stmt_a", "stmt_b"),
         ]
-    scheds = get_schedules_for_statement_pairs(
+    scheds = get_pairwise_statement_orderings(
         lin_knl,
         linearization_items,
         stmt_id_pairs,
@@ -495,7 +495,7 @@ def _check_sio_for_stmt_pair(
 
 def test_statement_instance_ordering():
     from loopy.schedule.checker import (
-        get_schedules_for_statement_pairs,
+        get_pairwise_statement_orderings,
     )
 
     # Example kernel (add deps to fix loop order)
@@ -544,7 +544,7 @@ def test_statement_instance_ordering():
         ("stmt_b", "stmt_d"),
         ("stmt_c", "stmt_d"),
         ]
-    scheds = get_schedules_for_statement_pairs(
+    scheds = get_pairwise_statement_orderings(
         knl,
         linearization_items,
         stmt_id_pairs,
@@ -622,7 +622,7 @@ def test_statement_instance_ordering():
 
 def test_statement_instance_ordering_with_hw_par_tags():
     from loopy.schedule.checker import (
-        get_schedules_for_statement_pairs,
+        get_pairwise_statement_orderings,
     )
     from loopy.schedule.checker.utils import (
         partition_inames_by_concurrency,
@@ -662,7 +662,7 @@ def test_statement_instance_ordering_with_hw_par_tags():
     stmt_id_pairs = [
         ("stmt_a", "stmt_b"),
         ]
-    scheds = get_schedules_for_statement_pairs(
+    scheds = get_pairwise_statement_orderings(
         lin_knl,
         linearization_items,
         stmt_id_pairs,
@@ -698,7 +698,7 @@ def test_statement_instance_ordering_with_hw_par_tags():
 
 def test_sios_and_schedules_with_barriers():
     from loopy.schedule.checker import (
-        get_schedules_for_statement_pairs,
+        get_pairwise_statement_orderings,
     )
 
     assumptions = "ij_end >= ij_start + 1 and lg_end >= 1"
@@ -743,7 +743,7 @@ def test_sios_and_schedules_with_barriers():
     linearization_items = lin_knl.linearization
 
     insn_id_pairs = [("j1", "2"), ("1", "i0")]
-    scheds = get_schedules_for_statement_pairs(
+    scheds = get_pairwise_statement_orderings(
         lin_knl, linearization_items, insn_id_pairs,
         return_schedules=True,  # include schedules for testing
         )

From 13d5e1260f4b120205e9493485f46f4159fcddd5 Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Sun, 28 Mar 2021 18:21:51 -0500
Subject: [PATCH 222/460] update comments about vec

---
 loopy/schedule/checker/__init__.py | 5 +----
 loopy/schedule/checker/schedule.py | 1 -
 2 files changed, 1 insertion(+), 5 deletions(-)

diff --git a/loopy/schedule/checker/__init__.py b/loopy/schedule/checker/__init__.py
index 7644fddac..577ab3dc4 100644
--- a/loopy/schedule/checker/__init__.py
+++ b/loopy/schedule/checker/__init__.py
@@ -104,10 +104,7 @@ def get_pairwise_statement_orderings(
 
     # {{{ Find any EnterLoop inames that are tagged as concurrent
     # so that get_pairwise_statement_orderings_inner() knows to ignore them
-    # (In the future, this shouldn't be necessary because there
-    # won't be any inames with ConcurrentTags in EnterLoop linearization items.
-    # Test which exercises this in downstream PR:
-    # test_linearization_checker_with_stroud_bernstein())
+    # (In the future, this should only include inames tagged with 'vec'.)
     from loopy.schedule.checker.utils import (
         partition_inames_by_concurrency,
         get_EnterLoop_inames,
diff --git a/loopy/schedule/checker/schedule.py b/loopy/schedule/checker/schedule.py
index 7fef51819..cf70cf3c5 100644
--- a/loopy/schedule/checker/schedule.py
+++ b/loopy/schedule/checker/schedule.py
@@ -240,7 +240,6 @@ def get_pairwise_statement_orderings_inner(
         :class:`islpy.Map`\ s, one for each statement.
 
     """
-    # TODO handle 'vec' appropriately; then remove loops_to_ignore?
 
     from loopy.schedule import (EnterLoop, LeaveLoop, Barrier, RunInstruction)
     from loopy.kernel.data import (LocalIndexTag, GroupIndexTag)

From 283b747d0a3842350d2bb1f1df942d09d52f65f0 Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Wed, 31 Mar 2021 17:42:26 -0500
Subject: [PATCH 223/460] rename var pairwise_schedules->pairwise_sios

---
 loopy/schedule/checker/schedule.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/loopy/schedule/checker/schedule.py b/loopy/schedule/checker/schedule.py
index cf70cf3c5..7ce61d5a3 100644
--- a/loopy/schedule/checker/schedule.py
+++ b/loopy/schedule/checker/schedule.py
@@ -795,7 +795,7 @@ def _get_map_for_stmt(
 
     # }}}
 
-    pairwise_schedules = {}
+    pairwise_sios = {}
     for insn_ids in insn_id_pairs:
         # Determine integer IDs that will represent each statement in mapping
         # (dependency map creation assumes sid_before=0 and sid_after=1, unless
@@ -896,17 +896,17 @@ def _get_sched_maps_and_sio(
         if return_schedules:
             # Store sched maps along with SIOs
             # (currently helpful for testing; also could be desired by a user)
-            pairwise_schedules[tuple(insn_ids)] = (
+            pairwise_sios[tuple(insn_ids)] = (
                 (sio_seq, tuple(intra_thread_sched_maps), ),
                 (sio_lpar, tuple(lpar_sched_maps), ),
                 (sio_gpar, tuple(gpar_sched_maps), ),
                 )
         else:
             # Store SIOs only
-            pairwise_schedules[tuple(insn_ids)] = (sio_seq, sio_lpar, sio_gpar)
+            pairwise_sios[tuple(insn_ids)] = (sio_seq, sio_lpar, sio_gpar)
 
     # }}}
 
-    return pairwise_schedules
+    return pairwise_sios
 
 # }}}

From cb6ca3dbf097275d85095ab8d5fb7deb7cf6236a Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Wed, 31 Mar 2021 18:33:53 -0500
Subject: [PATCH 224/460] return a namedtuple with the sios and pwscheds;
 update docs accordingly

---
 loopy/schedule/checker/__init__.py | 26 +++++------
 loopy/schedule/checker/schedule.py | 55 ++++++++++++-----------
 test/test_linearization_checker.py | 70 ++++++++++--------------------
 3 files changed, 67 insertions(+), 84 deletions(-)

diff --git a/loopy/schedule/checker/__init__.py b/loopy/schedule/checker/__init__.py
index 577ab3dc4..1cf8bc4e8 100644
--- a/loopy/schedule/checker/__init__.py
+++ b/loopy/schedule/checker/__init__.py
@@ -27,7 +27,6 @@ def get_pairwise_statement_orderings(
         knl,
         lin_items,
         insn_id_pairs,
-        return_schedules=False,
         ):
     r"""For each statement pair in a subset of all statement pairs found in a
     linearized kernel, determine the (relative) order in which the statement
@@ -51,16 +50,18 @@ def get_pairwise_statement_orderings(
         pairwise schedules in the returned dictionary.
 
     :returns: A dictionary mapping each two-tuple of instruction identifiers
-        provided in `insn_id_pairs` to a statement instance ordering, realized
-        as an :class:`islpy.Map` from each instance of the first statement to
-        all instances of the second statement that occur later.
-
-        Optional (mainly used for testing): If `return_schedules = True`, each
-        dict value will be a two-tuple containing the statement instance
-        ordering and also a ``pairwise schedule'', a pair of mappings from
-        statement instances to points in a single lexicographic ordering,
-        realized as a two-tuple containing two :class:`islpy.Map`\ s, one for
-        each statement.
+        provided in `insn_id_pairs` to a :class:`collections.namedtuple`
+        containing the intra-thread SIO (`sio_intra_thread`), intra-group SIO
+        (`sio_intra_group`), and global SIO (`sio_global`), each realized
+        as an :class:`islpy.Map` from each instance of the first
+        statement to all instances of the second statement that occur later,
+        as well as the intra-thread pairwise schedule (`pwsched_intra_thread`),
+        intra-group pairwise schedule (`pwsched_intra_group`), and the global
+        pairwise schedule (`pwsched_global`), each containing a pair of
+        mappings from statement instances to points in a lexicographic
+        ordering, one for each statement. Note that a pairwise schedule
+        alone cannot be used to reproduce the corresponding SIO without the
+        corresponding (unique) lexicographic order map, which is not returned.
 
     .. doctest:
 
@@ -85,7 +86,7 @@ def get_pairwise_statement_orderings(
         ...     [("insn_a", "insn_b")],
         ...     )
         >>> # Print map
-        >>> print(str(sio_dict[("insn_a", "insn_b")][0]
+        >>> print(str(sio_dict[("insn_a", "insn_b")].sio_intra_thread
         ...     ).replace("{ ", "{\n").replace(" :", "\n:"))
         [pj, pk] -> {
         [_lp_linchk_stmt' = 0, j', k'] -> [_lp_linchk_stmt = 1, j, k]
@@ -134,7 +135,6 @@ def get_pairwise_statement_orderings(
         lin_items,
         insn_id_pairs,
         loops_to_ignore=conc_loop_inames,
-        return_schedules=return_schedules,
         )
 
     # }}}
diff --git a/loopy/schedule/checker/schedule.py b/loopy/schedule/checker/schedule.py
index 7ce61d5a3..0f39f727d 100644
--- a/loopy/schedule/checker/schedule.py
+++ b/loopy/schedule/checker/schedule.py
@@ -195,7 +195,6 @@ def get_pairwise_statement_orderings_inner(
         lin_items,
         insn_id_pairs,
         loops_to_ignore=set(),
-        return_schedules=False,
         ):
     r"""For each statement pair in a subset of all statement pairs found in a
     linearized kernel, determine the (relative) order in which the statement
@@ -224,21 +223,19 @@ def get_pairwise_statement_orderings_inner(
         contain concurrent inames tagged with the ``vec`` or ``ilp`` array
         access tags.
 
-    :arg return_schedules: A :class:`bool` determining whether to include
-        pairwise schedules in the returned dictionary.
-
     :returns: A dictionary mapping each two-tuple of instruction identifiers
-        provided in `insn_id_pairs` to a statement instance ordering, realized
+        provided in `insn_id_pairs` to a :class:`collections.namedtuple`
+        containing the intra-thread SIO (`sio_intra_thread`), intra-group SIO
+        (`sio_intra_group`), and global SIO (`sio_global`), each realized
         as an :class:`islpy.Map` from each instance of the first
-        statement to all instances of the second statement that occur later.
-
-        Optional (mainly used for testing): If `return_schedules=True`,
-        each dict value will be a two-tuple containing the statement instance
-        ordering and also a ``pairwise schedule'', a pair of
-        mappings from statement instances to points in a single lexicographic
-        ordering, realized as a two-tuple containing two
-        :class:`islpy.Map`\ s, one for each statement.
-
+        statement to all instances of the second statement that occur later,
+        as well as the intra-thread pairwise schedule (`pwsched_intra_thread`),
+        intra-group pairwise schedule (`pwsched_intra_group`), and the global
+        pairwise schedule (`pwsched_global`), each containing a pair of
+        mappings from statement instances to points in a lexicographic
+        ordering, one for each statement. Note that a pairwise schedule
+        alone cannot be used to reproduce the corresponding SIO without the
+        corresponding (unique) lexicographic order map, which is not returned.
     """
 
     from loopy.schedule import (EnterLoop, LeaveLoop, Barrier, RunInstruction)
@@ -796,6 +793,16 @@ def _get_map_for_stmt(
     # }}}
 
     pairwise_sios = {}
+    from collections import namedtuple
+    StatementOrdering = namedtuple(
+        'StatementOrdering',
+        [
+            'sio_intra_thread', 'pwsched_intra_thread',
+            'sio_intra_group', 'pwsched_intra_group',
+            'sio_global', 'pwsched_global',
+        ])
+    # ("sio" = statement instance ordering; "pwsched" = pairwise schedule)
+
     for insn_ids in insn_id_pairs:
         # Determine integer IDs that will represent each statement in mapping
         # (dependency map creation assumes sid_before=0 and sid_after=1, unless
@@ -893,17 +900,15 @@ def _get_sched_maps_and_sio(
 
         # }}}
 
-        if return_schedules:
-            # Store sched maps along with SIOs
-            # (currently helpful for testing; also could be desired by a user)
-            pairwise_sios[tuple(insn_ids)] = (
-                (sio_seq, tuple(intra_thread_sched_maps), ),
-                (sio_lpar, tuple(lpar_sched_maps), ),
-                (sio_gpar, tuple(gpar_sched_maps), ),
-                )
-        else:
-            # Store SIOs only
-            pairwise_sios[tuple(insn_ids)] = (sio_seq, sio_lpar, sio_gpar)
+        # Store sched maps along with SIOs
+        pairwise_sios[tuple(insn_ids)] = StatementOrdering(
+            sio_intra_thread=sio_seq,
+            pwsched_intra_thread=tuple(intra_thread_sched_maps),
+            sio_intra_group=sio_lpar,
+            pwsched_intra_group=tuple(lpar_sched_maps),
+            sio_global=sio_gpar,
+            pwsched_global=tuple(gpar_sched_maps),
+            )
 
     # }}}
 
diff --git a/test/test_linearization_checker.py b/test/test_linearization_checker.py
index 63126643a..8ab98ed45 100644
--- a/test/test_linearization_checker.py
+++ b/test/test_linearization_checker.py
@@ -148,7 +148,6 @@ def test_pairwise_schedule_creation():
         lin_knl,
         linearization_items,
         insn_id_pairs,
-        return_schedules=True,  # include schedules for testing
         )
 
     # Relationship between stmt_a and stmt_b ---------------------------------------
@@ -355,7 +354,6 @@ def test_pairwise_schedule_creation_with_hw_par_tags():
         lin_knl,
         linearization_items,
         stmt_id_pairs,
-        return_schedules=True,
         )
 
     # Relationship between stmt_a and stmt_b ---------------------------------------
@@ -448,7 +446,7 @@ def _check_lex_map(exp_lex_order_map, n_dims):
 def _check_sio_for_stmt_pair(
         stmt_id_before,
         stmt_id_after,
-        sio_dict,
+        all_sios,
         sio_seq_exp=None,
         sched_before_seq_exp=None,
         sched_after_seq_exp=None,
@@ -460,33 +458,21 @@ def _check_sio_for_stmt_pair(
         sched_after_gconc_exp=None,
         ):
 
-    maps_found = sio_dict[(stmt_id_before, stmt_id_after)]
-
-    # Check whether scheds were included in sio_dict
-    if isinstance(maps_found[0], tuple):
-        # Scheds were included
-        (
-            sio_seq, (sched_before_seq, sched_after_seq)
-        ), (
-            sio_lconc, (sched_before_lconc, sched_after_lconc)
-        ), (
-            sio_gconc, (sched_before_gconc, sched_after_gconc)
-        ) = maps_found
-        map_candidates = zip([
-            sio_seq_exp, sched_before_seq_exp, sched_after_seq_exp,
-            sio_lconc_exp, sched_before_lconc_exp, sched_after_lconc_exp,
-            sio_gconc_exp, sched_before_gconc_exp, sched_after_gconc_exp,
-            ], [
-            sio_seq, sched_before_seq, sched_after_seq,
-            sio_lconc, sched_before_lconc, sched_after_lconc,
-            sio_gconc, sched_before_gconc, sched_after_gconc,
-            ])
-    else:
-        # Scheds not included
-        sio_seq, sio_lconc, sio_gconc = maps_found
-        map_candidates = zip(
-            [sio_seq_exp, sio_lconc_exp, sio_gconc_exp, ],
-            [sio_seq, sio_lconc, sio_gconc, ])
+    order_info = all_sios[(stmt_id_before, stmt_id_after)]
+
+    # Get pairs of maps to compare for equality
+    map_candidates = zip([
+        sio_seq_exp, sched_before_seq_exp, sched_after_seq_exp,
+        sio_lconc_exp, sched_before_lconc_exp, sched_after_lconc_exp,
+        sio_gconc_exp, sched_before_gconc_exp, sched_after_gconc_exp,
+        ], [
+        order_info.sio_intra_thread,
+        order_info.pwsched_intra_thread[0], order_info.pwsched_intra_thread[1],
+        order_info.sio_intra_group,
+        order_info.pwsched_intra_group[0], order_info.pwsched_intra_group[1],
+        order_info.sio_global,
+        order_info.pwsched_global[0], order_info.pwsched_global[1],
+        ])
 
     # Only compare to maps that were passed
     maps_to_compare = [(m1, m2) for m1, m2 in map_candidates if m1 is not None]
@@ -548,7 +534,6 @@ def test_statement_instance_ordering():
         knl,
         linearization_items,
         stmt_id_pairs,
-        return_schedules=True,
         )
 
     # Relationship between stmt_a and stmt_b ---------------------------------------
@@ -666,7 +651,6 @@ def test_statement_instance_ordering_with_hw_par_tags():
         lin_knl,
         linearization_items,
         stmt_id_pairs,
-        return_schedules=True,
         )
 
     # Create string for representing parallel iname condition in sio
@@ -744,9 +728,7 @@ def test_sios_and_schedules_with_barriers():
 
     insn_id_pairs = [("j1", "2"), ("1", "i0")]
     scheds = get_pairwise_statement_orderings(
-        lin_knl, linearization_items, insn_id_pairs,
-        return_schedules=True,  # include schedules for testing
-        )
+        lin_knl, linearization_items, insn_id_pairs)
 
     # Relationship between j1 and 2 --------------------------------------------
 
@@ -858,13 +840,7 @@ def test_sios_and_schedules_with_barriers():
     # Check for some key example pairs in the sio_lconc map
 
     # Get maps
-    (
-        sio_seq, (sched_map_before, sched_map_after)
-    ), (
-        sio_lconc, (sched_before_lconc, sched_after_lconc)
-    ), (
-        sio_gconc, (sched_before_gconc, sched_after_gconc)
-    ) = scheds[("j1", "2")]
+    order_info = scheds[("j1", "2")]
 
     # As long as this is not the last iteration of the i loop, then there
     # should be a barrier between the last instance of statement j1
@@ -887,9 +863,10 @@ def test_sios_and_schedules_with_barriers():
             conc_iname_bound_str,
             conc_iname_bound_str_p,
             ))
-    wanted_pairs = ensure_dim_names_match_and_align(wanted_pairs, sio_lconc)
+    wanted_pairs = ensure_dim_names_match_and_align(
+        wanted_pairs, order_info.sio_intra_group)
 
-    assert wanted_pairs.is_subset(sio_lconc)
+    assert wanted_pairs.is_subset(order_info.sio_intra_group)
 
     # If this IS the last iteration of the i loop, then there
     # should NOT be a barrier between the last instance of statement j1
@@ -908,9 +885,10 @@ def test_sios_and_schedules_with_barriers():
             conc_iname_bound_str,
             conc_iname_bound_str_p,
             ))
-    unwanted_pairs = ensure_dim_names_match_and_align(unwanted_pairs, sio_lconc)
+    unwanted_pairs = ensure_dim_names_match_and_align(
+        unwanted_pairs, order_info.sio_intra_group)
 
-    assert not unwanted_pairs.is_subset(sio_lconc)
+    assert not unwanted_pairs.is_subset(order_info.sio_intra_group)
 
     # Relationship between 1 and i0 --------------------------------------------
 

From 3843eb9d52a49bcb932074cb806601d4c8d6d6cb Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Wed, 31 Mar 2021 18:58:49 -0500
Subject: [PATCH 225/460] rename some variables to provide more precise
 information

---
 loopy/schedule/checker/schedule.py |  44 ++++----
 test/test_linearization_checker.py | 168 +++++++++++++++--------------
 2 files changed, 112 insertions(+), 100 deletions(-)

diff --git a/loopy/schedule/checker/schedule.py b/loopy/schedule/checker/schedule.py
index 0f39f727d..77a2354ca 100644
--- a/loopy/schedule/checker/schedule.py
+++ b/loopy/schedule/checker/schedule.py
@@ -559,7 +559,7 @@ def _gather_blex_ordering_info(sync_kind):
         seq_blex_dim_names_prime = append_marker_to_strings(
             seq_blex_dim_names, marker=BEFORE_MARK)
 
-        # Begin with the blex order map created as a standard lex order map
+        # Begin with the blex order map created as a standard lexicographical order
         blex_order_map = create_lex_order_map(
             dim_names=seq_blex_dim_names,
             in_dim_marker=BEFORE_MARK,
@@ -602,19 +602,20 @@ def _gather_blex_ordering_info(sync_kind):
 
         # {{{ _create_excluded_map_for_iname
 
-        def _create_excluded_map_for_iname(iname, blueprint):
+        def _create_excluded_map_for_iname(iname, key_lex_tuples):
             """Create the blex->blex pairs that must be subtracted from the
             initial blex order map for this particular loop using the 6 blex
-            tuples in the blueprint:
+            tuples in the key_lex_tuples:
             PRE->FIRST, BOTTOM(iname')->TOP(iname'+1), LAST->POST
             """
 
-            # Note: only blueprint[slex.FIRST] & blueprint[slex.LAST] contain pwaffs
+            # Note:
+            # only key_lex_tuples[slex.FIRST] & key_lex_tuples[slex.LAST] are pwaffs
 
             # {{{ _create_blex_set_from_tuple_pair
 
             def _create_blex_set_from_tuple_pair(before, after, wrap_cond=False):
-                """Given a before->after tuple pair in the blueprint, which may
+                """Given a before->after tuple pair in the key_lex_tuples, which may
                 have dim vals described by ints, strings (inames), and pwaffs,
                 create an ISL set in blex space that can be converted into
                 the ISL map to be subtracted
@@ -668,16 +669,17 @@ def _create_blex_set_from_tuple_pair(before, after, wrap_cond=False):
 
             # Enter loop case: PRE->FIRST
             full_blex_set = _create_blex_set_from_tuple_pair(
-                blueprint[slex.PRE], blueprint[slex.FIRST])
+                key_lex_tuples[slex.PRE], key_lex_tuples[slex.FIRST])
             # Wrap loop case: BOTTOM(iname')->TOP(iname'+1)
             full_blex_set |= _create_blex_set_from_tuple_pair(
-                blueprint[slex.BOTTOM], blueprint[slex.TOP], wrap_cond=True)
+                key_lex_tuples[slex.BOTTOM], key_lex_tuples[slex.TOP],
+                wrap_cond=True)
             # Leave loop case: LAST->POST
             full_blex_set |= _create_blex_set_from_tuple_pair(
-                blueprint[slex.LAST], blueprint[slex.POST])
+                key_lex_tuples[slex.LAST], key_lex_tuples[slex.POST])
 
             # Add condition to fix iteration value for *surrounding* loops (j = j')
-            for surrounding_iname in blueprint[slex.PRE][1::2]:
+            for surrounding_iname in key_lex_tuples[slex.PRE][1::2]:
                 s_blex_var = iname_to_blex_var[surrounding_iname]
                 full_blex_set &= blex_set_affs[s_blex_var].eq_set(
                     blex_set_affs[s_blex_var+BEFORE_MARK])
@@ -795,11 +797,11 @@ def _get_map_for_stmt(
     pairwise_sios = {}
     from collections import namedtuple
     StatementOrdering = namedtuple(
-        'StatementOrdering',
+        "StatementOrdering",
         [
-            'sio_intra_thread', 'pwsched_intra_thread',
-            'sio_intra_group', 'pwsched_intra_group',
-            'sio_global', 'pwsched_global',
+            "sio_intra_thread", "pwsched_intra_thread",
+            "sio_intra_group", "pwsched_intra_group",
+            "sio_global", "pwsched_global",
         ])
     # ("sio" = statement instance ordering; "pwsched" = pairwise schedule)
 
@@ -858,7 +860,7 @@ def _get_map_for_stmt(
 
         # Create statement instance ordering,
         # maps each statement instance to all statement instances occurring later
-        sio_seq = get_statement_ordering_map(
+        sio_intra_thread = get_statement_ordering_map(
             *intra_thread_sched_maps,  # note, func accepts exactly two maps
             lex_order_map,
             before_marker=BEFORE_MARK,
@@ -893,21 +895,21 @@ def _get_sched_maps_and_sio(
 
             return par_sched_maps, sio_par
 
-        lpar_sched_maps, sio_lpar = _get_sched_maps_and_sio(
+        pwsched_intra_group, sio_intra_group = _get_sched_maps_and_sio(
             stmt_inst_to_lblex, lblex_order_map, seq_lblex_dim_names)
-        gpar_sched_maps, sio_gpar = _get_sched_maps_and_sio(
+        pwsched_global, sio_global = _get_sched_maps_and_sio(
             stmt_inst_to_gblex, gblex_order_map, seq_gblex_dim_names)
 
         # }}}
 
         # Store sched maps along with SIOs
         pairwise_sios[tuple(insn_ids)] = StatementOrdering(
-            sio_intra_thread=sio_seq,
+            sio_intra_thread=sio_intra_thread,
             pwsched_intra_thread=tuple(intra_thread_sched_maps),
-            sio_intra_group=sio_lpar,
-            pwsched_intra_group=tuple(lpar_sched_maps),
-            sio_global=sio_gpar,
-            pwsched_global=tuple(gpar_sched_maps),
+            sio_intra_group=sio_intra_group,
+            pwsched_intra_group=tuple(pwsched_intra_group),
+            sio_global=sio_global,
+            pwsched_global=tuple(pwsched_global),
             )
 
     # }}}
diff --git a/test/test_linearization_checker.py b/test/test_linearization_checker.py
index 8ab98ed45..fa8dd58b4 100644
--- a/test/test_linearization_checker.py
+++ b/test/test_linearization_checker.py
@@ -154,7 +154,7 @@ def test_pairwise_schedule_creation():
 
     # Create expected maps and compare
 
-    sched_before_seq_exp = isl.Map(
+    sched_before_intra_thread_exp = isl.Map(
         "[pi, pk] -> { [%s=0, i, k] -> [%s] : 0 <= i < pi and 0 <= k < pk }"
         % (
             STATEMENT_VAR_NAME,
@@ -162,7 +162,7 @@ def test_pairwise_schedule_creation():
             )
         )
 
-    sched_after_seq_exp = isl.Map(
+    sched_after_intra_thread_exp = isl.Map(
         "[pi, pj] -> { [%s=1, i, j] -> [%s] : 0 <= i < pi and 0 <= j < pj }"
         % (
             STATEMENT_VAR_NAME,
@@ -172,8 +172,8 @@ def test_pairwise_schedule_creation():
 
     _check_sio_for_stmt_pair(
         "stmt_a", "stmt_b", scheds,
-        sched_before_seq_exp=sched_before_seq_exp,
-        sched_after_seq_exp=sched_after_seq_exp,
+        sched_before_intra_thread_exp=sched_before_intra_thread_exp,
+        sched_after_intra_thread_exp=sched_after_intra_thread_exp,
         )
 
     # ------------------------------------------------------------------------------
@@ -181,7 +181,7 @@ def test_pairwise_schedule_creation():
 
     # Create expected maps and compare
 
-    sched_before_seq_exp = isl.Map(
+    sched_before_intra_thread_exp = isl.Map(
         "[pi, pk] -> { [%s=0, i, k] -> [%s] : 0 <= i < pi and 0 <= k < pk }"
         % (
             STATEMENT_VAR_NAME,
@@ -189,7 +189,7 @@ def test_pairwise_schedule_creation():
             )
         )
 
-    sched_after_seq_exp = isl.Map(
+    sched_after_intra_thread_exp = isl.Map(
         "[pi, pj] -> { [%s=1, i, j] -> [%s] : 0 <= i < pi and 0 <= j < pj }"
         % (
             STATEMENT_VAR_NAME,
@@ -199,8 +199,8 @@ def test_pairwise_schedule_creation():
 
     _check_sio_for_stmt_pair(
         "stmt_a", "stmt_c", scheds,
-        sched_before_seq_exp=sched_before_seq_exp,
-        sched_after_seq_exp=sched_after_seq_exp,
+        sched_before_intra_thread_exp=sched_before_intra_thread_exp,
+        sched_after_intra_thread_exp=sched_after_intra_thread_exp,
         )
 
     # ------------------------------------------------------------------------------
@@ -208,7 +208,7 @@ def test_pairwise_schedule_creation():
 
     # Create expected maps and compare
 
-    sched_before_seq_exp = isl.Map(
+    sched_before_intra_thread_exp = isl.Map(
         "[pi, pk] -> { [%s=0, i, k] -> [%s] : 0 <= i < pi and 0 <= k < pk }"
         % (
             STATEMENT_VAR_NAME,
@@ -216,7 +216,7 @@ def test_pairwise_schedule_creation():
             )
         )
 
-    sched_after_seq_exp = isl.Map(
+    sched_after_intra_thread_exp = isl.Map(
         "[pt] -> { [%s=1, t] -> [%s] : 0 <= t < pt }"
         % (
             STATEMENT_VAR_NAME,
@@ -226,8 +226,8 @@ def test_pairwise_schedule_creation():
 
     _check_sio_for_stmt_pair(
         "stmt_a", "stmt_d", scheds,
-        sched_before_seq_exp=sched_before_seq_exp,
-        sched_after_seq_exp=sched_after_seq_exp,
+        sched_before_intra_thread_exp=sched_before_intra_thread_exp,
+        sched_after_intra_thread_exp=sched_after_intra_thread_exp,
         )
 
     # ------------------------------------------------------------------------------
@@ -235,7 +235,7 @@ def test_pairwise_schedule_creation():
 
     # Create expected maps and compare
 
-    sched_before_seq_exp = isl.Map(
+    sched_before_intra_thread_exp = isl.Map(
         "[pi, pj] -> { [%s=0, i, j] -> [%s] : 0 <= i < pi and 0 <= j < pj }"
         % (
             STATEMENT_VAR_NAME,
@@ -243,7 +243,7 @@ def test_pairwise_schedule_creation():
             )
         )
 
-    sched_after_seq_exp = isl.Map(
+    sched_after_intra_thread_exp = isl.Map(
         "[pi, pj] -> { [%s=1, i, j] -> [%s] : 0 <= i < pi and 0 <= j < pj }"
         % (
             STATEMENT_VAR_NAME,
@@ -253,8 +253,8 @@ def test_pairwise_schedule_creation():
 
     _check_sio_for_stmt_pair(
         "stmt_b", "stmt_c", scheds,
-        sched_before_seq_exp=sched_before_seq_exp,
-        sched_after_seq_exp=sched_after_seq_exp,
+        sched_before_intra_thread_exp=sched_before_intra_thread_exp,
+        sched_after_intra_thread_exp=sched_after_intra_thread_exp,
         )
 
     # ------------------------------------------------------------------------------
@@ -262,7 +262,7 @@ def test_pairwise_schedule_creation():
 
     # Create expected maps and compare
 
-    sched_before_seq_exp = isl.Map(
+    sched_before_intra_thread_exp = isl.Map(
         "[pi, pj] -> { [%s=0, i, j] -> [%s] : 0 <= i < pi and 0 <= j < pj }"
         % (
             STATEMENT_VAR_NAME,
@@ -270,7 +270,7 @@ def test_pairwise_schedule_creation():
             )
         )
 
-    sched_after_seq_exp = isl.Map(
+    sched_after_intra_thread_exp = isl.Map(
         "[pt] -> { [%s=1, t] -> [%s] : 0 <= t < pt }"
         % (
             STATEMENT_VAR_NAME,
@@ -280,8 +280,8 @@ def test_pairwise_schedule_creation():
 
     _check_sio_for_stmt_pair(
         "stmt_b", "stmt_d", scheds,
-        sched_before_seq_exp=sched_before_seq_exp,
-        sched_after_seq_exp=sched_after_seq_exp,
+        sched_before_intra_thread_exp=sched_before_intra_thread_exp,
+        sched_after_intra_thread_exp=sched_after_intra_thread_exp,
         )
 
     # ------------------------------------------------------------------------------
@@ -289,7 +289,7 @@ def test_pairwise_schedule_creation():
 
     # Create expected maps and compare
 
-    sched_before_seq_exp = isl.Map(
+    sched_before_intra_thread_exp = isl.Map(
         "[pi, pj] -> { [%s=0, i, j] -> [%s] : 0 <= i < pi and 0 <= j < pj }"
         % (
             STATEMENT_VAR_NAME,
@@ -297,7 +297,7 @@ def test_pairwise_schedule_creation():
             )
         )
 
-    sched_after_seq_exp = isl.Map(
+    sched_after_intra_thread_exp = isl.Map(
         "[pt] -> { [%s=1, t] -> [%s] : 0 <= t < pt }"
         % (
             STATEMENT_VAR_NAME,
@@ -307,8 +307,8 @@ def test_pairwise_schedule_creation():
 
     _check_sio_for_stmt_pair(
         "stmt_c", "stmt_d", scheds,
-        sched_before_seq_exp=sched_before_seq_exp,
-        sched_after_seq_exp=sched_after_seq_exp,
+        sched_before_intra_thread_exp=sched_before_intra_thread_exp,
+        sched_after_intra_thread_exp=sched_after_intra_thread_exp,
         )
 
 
@@ -360,7 +360,7 @@ def test_pairwise_schedule_creation_with_hw_par_tags():
 
     # Create expected maps and compare
 
-    sched_before_seq_exp = isl.Map(
+    sched_before_intra_thread_exp = isl.Map(
         "[pi,pj] -> {[%s=0,i,ii,j,jj] -> [%s] : 0 <= i,ii < pi and 0 <= j,jj < pj}"
         % (
             STATEMENT_VAR_NAME,
@@ -371,7 +371,7 @@ def test_pairwise_schedule_creation_with_hw_par_tags():
             )
         )
 
-    sched_after_seq_exp = isl.Map(
+    sched_after_intra_thread_exp = isl.Map(
         "[pi,pj] -> {[%s=1,i,ii,j,jj] -> [%s] : 0 <= i,ii < pi and 0 <= j,jj < pj}"
         % (
             STATEMENT_VAR_NAME,
@@ -384,8 +384,8 @@ def test_pairwise_schedule_creation_with_hw_par_tags():
 
     _check_sio_for_stmt_pair(
         "stmt_a", "stmt_b", scheds,
-        sched_before_seq_exp=sched_before_seq_exp,
-        sched_after_seq_exp=sched_after_seq_exp,
+        sched_before_intra_thread_exp=sched_before_intra_thread_exp,
+        sched_after_intra_thread_exp=sched_after_intra_thread_exp,
         )
 
     # ------------------------------------------------------------------------------
@@ -447,24 +447,27 @@ def _check_sio_for_stmt_pair(
         stmt_id_before,
         stmt_id_after,
         all_sios,
-        sio_seq_exp=None,
-        sched_before_seq_exp=None,
-        sched_after_seq_exp=None,
-        sio_lconc_exp=None,
-        sched_before_lconc_exp=None,
-        sched_after_lconc_exp=None,
-        sio_gconc_exp=None,
-        sched_before_gconc_exp=None,
-        sched_after_gconc_exp=None,
+        sio_intra_thread_exp=None,
+        sched_before_intra_thread_exp=None,
+        sched_after_intra_thread_exp=None,
+        sio_intra_group_exp=None,
+        sched_before_intra_group_exp=None,
+        sched_after_intra_group_exp=None,
+        sio_global_exp=None,
+        sched_before_global_exp=None,
+        sched_after_global_exp=None,
         ):
 
     order_info = all_sios[(stmt_id_before, stmt_id_after)]
 
     # Get pairs of maps to compare for equality
     map_candidates = zip([
-        sio_seq_exp, sched_before_seq_exp, sched_after_seq_exp,
-        sio_lconc_exp, sched_before_lconc_exp, sched_after_lconc_exp,
-        sio_gconc_exp, sched_before_gconc_exp, sched_after_gconc_exp,
+        sio_intra_thread_exp,
+        sched_before_intra_thread_exp, sched_after_intra_thread_exp,
+        sio_intra_group_exp,
+        sched_before_intra_group_exp, sched_after_intra_group_exp,
+        sio_global_exp,
+        sched_before_global_exp, sched_after_global_exp,
         ], [
         order_info.sio_intra_thread,
         order_info.pwsched_intra_thread[0], order_info.pwsched_intra_thread[1],
@@ -538,40 +541,43 @@ def test_statement_instance_ordering():
 
     # Relationship between stmt_a and stmt_b ---------------------------------------
 
-    sio_seq_exp = _isl_map_with_marked_dims(
+    sio_intra_thread_exp = _isl_map_with_marked_dims(
         "[pi, pj, pk] -> {{ "
         "[{0}'=0, i', k'] -> [{0}=1, i, j] : "
         "0 <= i,i' < pi and 0 <= k' < pk and 0 <= j < pj and i >= i' "
         "}}".format(STATEMENT_VAR_NAME)
         )
 
-    _check_sio_for_stmt_pair("stmt_a", "stmt_b", scheds, sio_seq_exp=sio_seq_exp)
+    _check_sio_for_stmt_pair(
+        "stmt_a", "stmt_b", scheds, sio_intra_thread_exp=sio_intra_thread_exp)
 
     # Relationship between stmt_a and stmt_c ---------------------------------------
 
-    sio_seq_exp = _isl_map_with_marked_dims(
+    sio_intra_thread_exp = _isl_map_with_marked_dims(
         "[pi, pj, pk] -> {{ "
         "[{0}'=0, i', k'] -> [{0}=1, i, j] : "
         "0 <= i,i' < pi and 0 <= k' < pk and 0 <= j < pj and i >= i' "
         "}}".format(STATEMENT_VAR_NAME)
         )
 
-    _check_sio_for_stmt_pair("stmt_a", "stmt_c", scheds, sio_seq_exp=sio_seq_exp)
+    _check_sio_for_stmt_pair(
+        "stmt_a", "stmt_c", scheds, sio_intra_thread_exp=sio_intra_thread_exp)
 
     # Relationship between stmt_a and stmt_d ---------------------------------------
 
-    sio_seq_exp = _isl_map_with_marked_dims(
+    sio_intra_thread_exp = _isl_map_with_marked_dims(
         "[pt, pi, pk] -> {{ "
         "[{0}'=0, i', k'] -> [{0}=1, t] : "
         "0 <= i' < pi and 0 <= k' < pk and 0 <= t < pt "
         "}}".format(STATEMENT_VAR_NAME)
         )
 
-    _check_sio_for_stmt_pair("stmt_a", "stmt_d", scheds, sio_seq_exp=sio_seq_exp)
+    _check_sio_for_stmt_pair(
+        "stmt_a", "stmt_d", scheds, sio_intra_thread_exp=sio_intra_thread_exp)
 
     # Relationship between stmt_b and stmt_c ---------------------------------------
 
-    sio_seq_exp = _isl_map_with_marked_dims(
+    sio_intra_thread_exp = _isl_map_with_marked_dims(
         "[pi, pj] -> {{ "
         "[{0}'=0, i', j'] -> [{0}=1, i, j] : "
         "0 <= i,i' < pi and 0 <= j,j' < pj and i > i'; "
@@ -580,29 +586,32 @@ def test_statement_instance_ordering():
         "}}".format(STATEMENT_VAR_NAME)
         )
 
-    _check_sio_for_stmt_pair("stmt_b", "stmt_c", scheds, sio_seq_exp=sio_seq_exp)
+    _check_sio_for_stmt_pair(
+        "stmt_b", "stmt_c", scheds, sio_intra_thread_exp=sio_intra_thread_exp)
 
     # Relationship between stmt_b and stmt_d ---------------------------------------
 
-    sio_seq_exp = _isl_map_with_marked_dims(
+    sio_intra_thread_exp = _isl_map_with_marked_dims(
         "[pt, pi, pj] -> {{ "
         "[{0}'=0, i', j'] -> [{0}=1, t] : "
         "0 <= i' < pi and 0 <= j' < pj and 0 <= t < pt "
         "}}".format(STATEMENT_VAR_NAME)
         )
 
-    _check_sio_for_stmt_pair("stmt_b", "stmt_d", scheds, sio_seq_exp=sio_seq_exp)
+    _check_sio_for_stmt_pair(
+        "stmt_b", "stmt_d", scheds, sio_intra_thread_exp=sio_intra_thread_exp)
 
     # Relationship between stmt_c and stmt_d ---------------------------------------
 
-    sio_seq_exp = _isl_map_with_marked_dims(
+    sio_intra_thread_exp = _isl_map_with_marked_dims(
         "[pt, pi, pj] -> {{ "
         "[{0}'=0, i', j'] -> [{0}=1, t] : "
         "0 <= i' < pi and 0 <= j' < pj and 0 <= t < pt "
         "}}".format(STATEMENT_VAR_NAME)
         )
 
-    _check_sio_for_stmt_pair("stmt_c", "stmt_d", scheds, sio_seq_exp=sio_seq_exp)
+    _check_sio_for_stmt_pair(
+        "stmt_c", "stmt_d", scheds, sio_intra_thread_exp=sio_intra_thread_exp)
 
 
 def test_statement_instance_ordering_with_hw_par_tags():
@@ -660,7 +669,7 @@ def test_statement_instance_ordering_with_hw_par_tags():
 
     # Relationship between stmt_a and stmt_b ---------------------------------------
 
-    sio_seq_exp = _isl_map_with_marked_dims(
+    sio_intra_thread_exp = _isl_map_with_marked_dims(
         "[pi, pj] -> {{ "
         "[{0}'=0, i', ii', j', jj'] -> [{0}=1, i, ii, j, jj] : "
         "0 <= i,ii,i',ii' < pi and 0 <= j,jj,j',jj' < pj and ii >= ii' "
@@ -671,7 +680,8 @@ def test_statement_instance_ordering_with_hw_par_tags():
             )
         )
 
-    _check_sio_for_stmt_pair("stmt_a", "stmt_b", scheds, sio_seq_exp=sio_seq_exp)
+    _check_sio_for_stmt_pair(
+        "stmt_a", "stmt_b", scheds, sio_intra_thread_exp=sio_intra_thread_exp)
 
     # ------------------------------------------------------------------------------
 
@@ -740,7 +750,7 @@ def test_sios_and_schedules_with_barriers():
     conc_iname_bound_str = "0 <= l0,l1,g0 < lg_end"
     conc_iname_bound_str_p = "0 <= l0',l1',g0' < lg_end"
 
-    sched_before_lconc_exp = isl.Map(
+    sched_before_intra_group_exp = isl.Map(
         "[ij_start, ij_end, lg_end] -> {"
         "[%s=0, i, j, l0, l1, g0] -> [%s] : "
         "%s and %s}"  # iname bounds
@@ -755,7 +765,7 @@ def test_sios_and_schedules_with_barriers():
             )
         )
 
-    sched_after_lconc_exp = isl.Map(
+    sched_after_intra_group_exp = isl.Map(
         "[lg_end] -> {[%s=1, l0, l1, g0] -> [%s] : %s}"
         % (
             STATEMENT_VAR_NAME,
@@ -767,7 +777,7 @@ def test_sios_and_schedules_with_barriers():
             )
         )
 
-    sio_lconc_exp = _isl_map_with_marked_dims(
+    sio_intra_group_exp = _isl_map_with_marked_dims(
         "[ij_start, ij_end, lg_end] -> {{ "
         "[{0}'=0, i', j', l0', l1', g0'] -> [{0}=1, l0, l1, g0] : "
         "(ij_start <= j' < ij_end-1 or "  # not last iteration of j
@@ -784,7 +794,7 @@ def test_sios_and_schedules_with_barriers():
             )
         )
 
-    sched_before_gconc_exp = isl.Map(
+    sched_before_global_exp = isl.Map(
         "[ij_start, ij_end, lg_end] -> {"
         "[%s=0, i, j, l0, l1, g0] -> [%s] : "
         "%s and %s}"  # iname bounds
@@ -799,7 +809,7 @@ def test_sios_and_schedules_with_barriers():
             )
         )
 
-    sched_after_gconc_exp = isl.Map(
+    sched_after_global_exp = isl.Map(
         "[lg_end] -> {[%s=1, l0, l1, g0] -> [%s] : "
         "%s}"  # iname bounds
         % (
@@ -812,7 +822,7 @@ def test_sios_and_schedules_with_barriers():
             )
         )
 
-    sio_gconc_exp = _isl_map_with_marked_dims(
+    sio_global_exp = _isl_map_with_marked_dims(
         "[ij_start,ij_end,lg_end] -> {{ "
         "[{0}'=0, i', j', l0', l1', g0'] -> [{0}=1, l0, l1, g0] : "
         "ij_start <= i' < ij_end-1 "  # not last iteration of i
@@ -829,15 +839,15 @@ def test_sios_and_schedules_with_barriers():
 
     _check_sio_for_stmt_pair(
         "j1", "2", scheds,
-        sio_lconc_exp=sio_lconc_exp,
-        sched_before_lconc_exp=sched_before_lconc_exp,
-        sched_after_lconc_exp=sched_after_lconc_exp,
-        sio_gconc_exp=sio_gconc_exp,
-        sched_before_gconc_exp=sched_before_gconc_exp,
-        sched_after_gconc_exp=sched_after_gconc_exp,
+        sio_intra_group_exp=sio_intra_group_exp,
+        sched_before_intra_group_exp=sched_before_intra_group_exp,
+        sched_after_intra_group_exp=sched_after_intra_group_exp,
+        sio_global_exp=sio_global_exp,
+        sched_before_global_exp=sched_before_global_exp,
+        sched_after_global_exp=sched_after_global_exp,
         )
 
-    # Check for some key example pairs in the sio_lconc map
+    # Check for some key example pairs in the sio_intra_group map
 
     # Get maps
     order_info = scheds[("j1", "2")]
@@ -894,7 +904,7 @@ def test_sios_and_schedules_with_barriers():
 
     # Create expected maps and compare
 
-    sched_before_lconc_exp = isl.Map(
+    sched_before_intra_group_exp = isl.Map(
         "[lg_end] -> {[%s=0, l0, l1, g0] -> [%s] : "
         "%s}"  # iname bounds
         % (
@@ -907,7 +917,7 @@ def test_sios_and_schedules_with_barriers():
             )
         )
 
-    sched_after_lconc_exp = isl.Map(
+    sched_after_intra_group_exp = isl.Map(
         "[ij_start, ij_end, lg_end] -> {"
         "[%s=1, i, j, l0, l1, g0] -> [%s] : "
         "%s and %s}"  # iname bounds
@@ -922,7 +932,7 @@ def test_sios_and_schedules_with_barriers():
             )
         )
 
-    sio_lconc_exp = _isl_map_with_marked_dims(
+    sio_intra_group_exp = _isl_map_with_marked_dims(
         "[ij_start, ij_end, lg_end] -> {{ "
         "[{0}'=0, l0', l1', g0'] -> [{0}=1, i, j, l0, l1, g0] : "
         "ij_start + 1 <= i < ij_end "  # not first iteration of i
@@ -938,7 +948,7 @@ def test_sios_and_schedules_with_barriers():
             )
         )
 
-    sched_before_gconc_exp = isl.Map(
+    sched_before_global_exp = isl.Map(
         "[lg_end] -> {[%s=0, l0, l1, g0] -> [%s] : "
         "%s}"  # iname bounds
         % (
@@ -951,7 +961,7 @@ def test_sios_and_schedules_with_barriers():
             )
         )
 
-    sched_after_gconc_exp = isl.Map(
+    sched_after_global_exp = isl.Map(
         "[ij_start, ij_end, lg_end] -> {"
         "[%s=1, i, j, l0, l1, g0] -> [%s] : "
         "%s and %s}"  # iname bounds
@@ -966,7 +976,7 @@ def test_sios_and_schedules_with_barriers():
             )
         )
 
-    sio_gconc_exp = _isl_map_with_marked_dims(
+    sio_global_exp = _isl_map_with_marked_dims(
         "[ij_start, ij_end, lg_end] -> {{ "
         "[{0}'=0, l0', l1', g0'] -> [{0}=1, i, j, l0, l1, g0] : "
         "ij_start + 1 <= i < ij_end "  # not first iteration of i
@@ -983,12 +993,12 @@ def test_sios_and_schedules_with_barriers():
 
     _check_sio_for_stmt_pair(
         "1", "i0", scheds,
-        sio_lconc_exp=sio_lconc_exp,
-        sched_before_lconc_exp=sched_before_lconc_exp,
-        sched_after_lconc_exp=sched_after_lconc_exp,
-        sio_gconc_exp=sio_gconc_exp,
-        sched_before_gconc_exp=sched_before_gconc_exp,
-        sched_after_gconc_exp=sched_after_gconc_exp,
+        sio_intra_group_exp=sio_intra_group_exp,
+        sched_before_intra_group_exp=sched_before_intra_group_exp,
+        sched_after_intra_group_exp=sched_after_intra_group_exp,
+        sio_global_exp=sio_global_exp,
+        sched_before_global_exp=sched_before_global_exp,
+        sched_after_global_exp=sched_after_global_exp,
         )
 
 # }}}

From 7dd60a50e45274413d060ff9e1a6628166fef8d1 Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Thu, 1 Apr 2021 00:02:03 -0500
Subject: [PATCH 226/460] fix documentation

---
 loopy/schedule/checker/__init__.py | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/loopy/schedule/checker/__init__.py b/loopy/schedule/checker/__init__.py
index 1cf8bc4e8..6a2ecb9c5 100644
--- a/loopy/schedule/checker/__init__.py
+++ b/loopy/schedule/checker/__init__.py
@@ -46,9 +46,6 @@ def get_pairwise_statement_orderings(
 
     :arg insn_id_pairs: A list containing pairs of instruction identifiers.
 
-    :arg return_schedules: A :class:`bool` determining whether to include
-        pairwise schedules in the returned dictionary.
-
     :returns: A dictionary mapping each two-tuple of instruction identifiers
         provided in `insn_id_pairs` to a :class:`collections.namedtuple`
         containing the intra-thread SIO (`sio_intra_thread`), intra-group SIO
@@ -77,9 +74,8 @@ def get_pairwise_statement_orderings(
         >>> knl = lp.add_and_infer_dtypes(knl, {"a": np.float32, "b": np.float32})
         >>> # Get a linearization
         >>> knl = lp.get_one_linearized_kernel(lp.preprocess_kernel(knl))
-        >>> # Get a pairwise schedule -----------------------------------------------
+        >>> # Get pairwise order info -----------------------------------------------
         >>> from loopy.schedule.checker import get_pairwise_statement_orderings
-        >>> # Get two maps ----------------------------------------------------------
         >>> sio_dict = get_pairwise_statement_orderings(
         ...     knl,
         ...     knl.linearization,

From 2f97cc958eaf12bd640e7c704c7305ad474f6512 Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Thu, 1 Apr 2021 00:02:53 -0500
Subject: [PATCH 227/460] more variable renaming to clarify output from
 get_pairwise_statement_orderings()

---
 test/test_linearization_checker.py | 78 +++++++++++++++---------------
 1 file changed, 39 insertions(+), 39 deletions(-)

diff --git a/test/test_linearization_checker.py b/test/test_linearization_checker.py
index fa8dd58b4..fa197bccd 100644
--- a/test/test_linearization_checker.py
+++ b/test/test_linearization_checker.py
@@ -144,7 +144,7 @@ def test_pairwise_schedule_creation():
         ("stmt_b", "stmt_d"),
         ("stmt_c", "stmt_d"),
         ]
-    scheds = get_pairwise_statement_orderings(
+    pworders = get_pairwise_statement_orderings(
         lin_knl,
         linearization_items,
         insn_id_pairs,
@@ -170,8 +170,8 @@ def test_pairwise_schedule_creation():
             )
         )
 
-    _check_sio_for_stmt_pair(
-        "stmt_a", "stmt_b", scheds,
+    _check_orderings_for_stmt_pair(
+        "stmt_a", "stmt_b", pworders,
         sched_before_intra_thread_exp=sched_before_intra_thread_exp,
         sched_after_intra_thread_exp=sched_after_intra_thread_exp,
         )
@@ -197,8 +197,8 @@ def test_pairwise_schedule_creation():
             )
         )
 
-    _check_sio_for_stmt_pair(
-        "stmt_a", "stmt_c", scheds,
+    _check_orderings_for_stmt_pair(
+        "stmt_a", "stmt_c", pworders,
         sched_before_intra_thread_exp=sched_before_intra_thread_exp,
         sched_after_intra_thread_exp=sched_after_intra_thread_exp,
         )
@@ -224,8 +224,8 @@ def test_pairwise_schedule_creation():
             )
         )
 
-    _check_sio_for_stmt_pair(
-        "stmt_a", "stmt_d", scheds,
+    _check_orderings_for_stmt_pair(
+        "stmt_a", "stmt_d", pworders,
         sched_before_intra_thread_exp=sched_before_intra_thread_exp,
         sched_after_intra_thread_exp=sched_after_intra_thread_exp,
         )
@@ -251,8 +251,8 @@ def test_pairwise_schedule_creation():
             )
         )
 
-    _check_sio_for_stmt_pair(
-        "stmt_b", "stmt_c", scheds,
+    _check_orderings_for_stmt_pair(
+        "stmt_b", "stmt_c", pworders,
         sched_before_intra_thread_exp=sched_before_intra_thread_exp,
         sched_after_intra_thread_exp=sched_after_intra_thread_exp,
         )
@@ -278,8 +278,8 @@ def test_pairwise_schedule_creation():
             )
         )
 
-    _check_sio_for_stmt_pair(
-        "stmt_b", "stmt_d", scheds,
+    _check_orderings_for_stmt_pair(
+        "stmt_b", "stmt_d", pworders,
         sched_before_intra_thread_exp=sched_before_intra_thread_exp,
         sched_after_intra_thread_exp=sched_after_intra_thread_exp,
         )
@@ -305,8 +305,8 @@ def test_pairwise_schedule_creation():
             )
         )
 
-    _check_sio_for_stmt_pair(
-        "stmt_c", "stmt_d", scheds,
+    _check_orderings_for_stmt_pair(
+        "stmt_c", "stmt_d", pworders,
         sched_before_intra_thread_exp=sched_before_intra_thread_exp,
         sched_after_intra_thread_exp=sched_after_intra_thread_exp,
         )
@@ -350,7 +350,7 @@ def test_pairwise_schedule_creation_with_hw_par_tags():
     stmt_id_pairs = [
         ("stmt_a", "stmt_b"),
         ]
-    scheds = get_pairwise_statement_orderings(
+    pworders = get_pairwise_statement_orderings(
         lin_knl,
         linearization_items,
         stmt_id_pairs,
@@ -382,8 +382,8 @@ def test_pairwise_schedule_creation_with_hw_par_tags():
             )
         )
 
-    _check_sio_for_stmt_pair(
-        "stmt_a", "stmt_b", scheds,
+    _check_orderings_for_stmt_pair(
+        "stmt_a", "stmt_b", pworders,
         sched_before_intra_thread_exp=sched_before_intra_thread_exp,
         sched_after_intra_thread_exp=sched_after_intra_thread_exp,
         )
@@ -443,7 +443,7 @@ def _check_lex_map(exp_lex_order_map, n_dims):
 
 # {{{ test statement instance ordering creation
 
-def _check_sio_for_stmt_pair(
+def _check_orderings_for_stmt_pair(
         stmt_id_before,
         stmt_id_after,
         all_sios,
@@ -533,7 +533,7 @@ def test_statement_instance_ordering():
         ("stmt_b", "stmt_d"),
         ("stmt_c", "stmt_d"),
         ]
-    scheds = get_pairwise_statement_orderings(
+    pworders = get_pairwise_statement_orderings(
         knl,
         linearization_items,
         stmt_id_pairs,
@@ -548,8 +548,8 @@ def test_statement_instance_ordering():
         "}}".format(STATEMENT_VAR_NAME)
         )
 
-    _check_sio_for_stmt_pair(
-        "stmt_a", "stmt_b", scheds, sio_intra_thread_exp=sio_intra_thread_exp)
+    _check_orderings_for_stmt_pair(
+        "stmt_a", "stmt_b", pworders, sio_intra_thread_exp=sio_intra_thread_exp)
 
     # Relationship between stmt_a and stmt_c ---------------------------------------
 
@@ -560,8 +560,8 @@ def test_statement_instance_ordering():
         "}}".format(STATEMENT_VAR_NAME)
         )
 
-    _check_sio_for_stmt_pair(
-        "stmt_a", "stmt_c", scheds, sio_intra_thread_exp=sio_intra_thread_exp)
+    _check_orderings_for_stmt_pair(
+        "stmt_a", "stmt_c", pworders, sio_intra_thread_exp=sio_intra_thread_exp)
 
     # Relationship between stmt_a and stmt_d ---------------------------------------
 
@@ -572,8 +572,8 @@ def test_statement_instance_ordering():
         "}}".format(STATEMENT_VAR_NAME)
         )
 
-    _check_sio_for_stmt_pair(
-        "stmt_a", "stmt_d", scheds, sio_intra_thread_exp=sio_intra_thread_exp)
+    _check_orderings_for_stmt_pair(
+        "stmt_a", "stmt_d", pworders, sio_intra_thread_exp=sio_intra_thread_exp)
 
     # Relationship between stmt_b and stmt_c ---------------------------------------
 
@@ -586,8 +586,8 @@ def test_statement_instance_ordering():
         "}}".format(STATEMENT_VAR_NAME)
         )
 
-    _check_sio_for_stmt_pair(
-        "stmt_b", "stmt_c", scheds, sio_intra_thread_exp=sio_intra_thread_exp)
+    _check_orderings_for_stmt_pair(
+        "stmt_b", "stmt_c", pworders, sio_intra_thread_exp=sio_intra_thread_exp)
 
     # Relationship between stmt_b and stmt_d ---------------------------------------
 
@@ -598,8 +598,8 @@ def test_statement_instance_ordering():
         "}}".format(STATEMENT_VAR_NAME)
         )
 
-    _check_sio_for_stmt_pair(
-        "stmt_b", "stmt_d", scheds, sio_intra_thread_exp=sio_intra_thread_exp)
+    _check_orderings_for_stmt_pair(
+        "stmt_b", "stmt_d", pworders, sio_intra_thread_exp=sio_intra_thread_exp)
 
     # Relationship between stmt_c and stmt_d ---------------------------------------
 
@@ -610,8 +610,8 @@ def test_statement_instance_ordering():
         "}}".format(STATEMENT_VAR_NAME)
         )
 
-    _check_sio_for_stmt_pair(
-        "stmt_c", "stmt_d", scheds, sio_intra_thread_exp=sio_intra_thread_exp)
+    _check_orderings_for_stmt_pair(
+        "stmt_c", "stmt_d", pworders, sio_intra_thread_exp=sio_intra_thread_exp)
 
 
 def test_statement_instance_ordering_with_hw_par_tags():
@@ -656,7 +656,7 @@ def test_statement_instance_ordering_with_hw_par_tags():
     stmt_id_pairs = [
         ("stmt_a", "stmt_b"),
         ]
-    scheds = get_pairwise_statement_orderings(
+    pworders = get_pairwise_statement_orderings(
         lin_knl,
         linearization_items,
         stmt_id_pairs,
@@ -680,8 +680,8 @@ def test_statement_instance_ordering_with_hw_par_tags():
             )
         )
 
-    _check_sio_for_stmt_pair(
-        "stmt_a", "stmt_b", scheds, sio_intra_thread_exp=sio_intra_thread_exp)
+    _check_orderings_for_stmt_pair(
+        "stmt_a", "stmt_b", pworders, sio_intra_thread_exp=sio_intra_thread_exp)
 
     # ------------------------------------------------------------------------------
 
@@ -737,7 +737,7 @@ def test_sios_and_schedules_with_barriers():
     linearization_items = lin_knl.linearization
 
     insn_id_pairs = [("j1", "2"), ("1", "i0")]
-    scheds = get_pairwise_statement_orderings(
+    pworders = get_pairwise_statement_orderings(
         lin_knl, linearization_items, insn_id_pairs)
 
     # Relationship between j1 and 2 --------------------------------------------
@@ -837,8 +837,8 @@ def test_sios_and_schedules_with_barriers():
             )
         )
 
-    _check_sio_for_stmt_pair(
-        "j1", "2", scheds,
+    _check_orderings_for_stmt_pair(
+        "j1", "2", pworders,
         sio_intra_group_exp=sio_intra_group_exp,
         sched_before_intra_group_exp=sched_before_intra_group_exp,
         sched_after_intra_group_exp=sched_after_intra_group_exp,
@@ -850,7 +850,7 @@ def test_sios_and_schedules_with_barriers():
     # Check for some key example pairs in the sio_intra_group map
 
     # Get maps
-    order_info = scheds[("j1", "2")]
+    order_info = pworders[("j1", "2")]
 
     # As long as this is not the last iteration of the i loop, then there
     # should be a barrier between the last instance of statement j1
@@ -991,8 +991,8 @@ def test_sios_and_schedules_with_barriers():
             )
         )
 
-    _check_sio_for_stmt_pair(
-        "1", "i0", scheds,
+    _check_orderings_for_stmt_pair(
+        "1", "i0", pworders,
         sio_intra_group_exp=sio_intra_group_exp,
         sched_before_intra_group_exp=sched_before_intra_group_exp,
         sched_after_intra_group_exp=sched_after_intra_group_exp,

From cd59be5b64ceeac2c658ac6d56e9e4f88aaab389 Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Fri, 2 Apr 2021 03:53:15 -0500
Subject: [PATCH 228/460] add dependencies attribute to instruction (copying in
 changes from downstream PR to create intermediate PR)

---
 loopy/kernel/instruction.py | 67 +++++++++++++++++++++++++++++--------
 1 file changed, 53 insertions(+), 14 deletions(-)

diff --git a/loopy/kernel/instruction.py b/loopy/kernel/instruction.py
index 81b174653..ab4082c70 100644
--- a/loopy/kernel/instruction.py
+++ b/loopy/kernel/instruction.py
@@ -212,6 +212,7 @@ class InstructionBase(ImmutableRecord, Taggable):
     pymbolic_set_fields = {"predicates"}
 
     def __init__(self, id, depends_on, depends_on_is_final,
+            dependencies,
             groups, conflicts_with_groups,
             no_sync_with,
             within_inames_is_final, within_inames,
@@ -241,6 +242,9 @@ def __init__(self, id, depends_on, depends_on_is_final,
         if depends_on is None:
             depends_on = frozenset()
 
+        if dependencies is None:
+            dependencies = {}
+
         if groups is None:
             groups = frozenset()
 
@@ -297,6 +301,7 @@ def __init__(self, id, depends_on, depends_on_is_final,
                 id=id,
                 depends_on=depends_on,
                 depends_on_is_final=depends_on_is_final,
+                dependencies=dependencies,
                 no_sync_with=no_sync_with,
                 groups=groups, conflicts_with_groups=conflicts_with_groups,
                 within_inames_is_final=within_inames_is_final,
@@ -388,6 +393,7 @@ def get_str_options(self):
 
         if self.depends_on:
             result.append("dep="+":".join(self.depends_on))
+        # TODO something with dependencies?
         if self.no_sync_with:
             result.append("nosync="+":".join(
                     "%s@%s" % entry for entry in self.no_sync_with))
@@ -457,6 +463,7 @@ def __setstate__(self, val):
         if self.id is not None:  # pylint:disable=access-member-before-definition
             self.id = intern(self.id)
         self.depends_on = intern_frozenset_of_ids(self.depends_on)
+        # TODO something with dependencies?
         self.groups = intern_frozenset_of_ids(self.groups)
         self.conflicts_with_groups = (
                 intern_frozenset_of_ids(self.conflicts_with_groups))
@@ -874,6 +881,7 @@ def __init__(self,
             id=None,
             depends_on=None,
             depends_on_is_final=None,
+            dependencies=None,
             groups=None,
             conflicts_with_groups=None,
             no_sync_with=None,
@@ -887,6 +895,7 @@ def __init__(self,
                 id=id,
                 depends_on=depends_on,
                 depends_on_is_final=depends_on_is_final,
+                dependencies=dependencies,
                 groups=groups,
                 conflicts_with_groups=conflicts_with_groups,
                 no_sync_with=no_sync_with,
@@ -1005,6 +1014,7 @@ def __init__(self,
             id=None,
             depends_on=None,
             depends_on_is_final=None,
+            dependencies=None,
             groups=None,
             conflicts_with_groups=None,
             no_sync_with=None,
@@ -1018,6 +1028,7 @@ def __init__(self,
                 id=id,
                 depends_on=depends_on,
                 depends_on_is_final=depends_on_is_final,
+                dependencies=dependencies,
                 groups=groups,
                 conflicts_with_groups=conflicts_with_groups,
                 no_sync_with=no_sync_with,
@@ -1179,13 +1190,20 @@ class CInstruction(InstructionBase):
 
     def __init__(self,
             iname_exprs, code,
-            read_variables=frozenset(), assignees=tuple(),
-            id=None, depends_on=None, depends_on_is_final=None,
-            groups=None, conflicts_with_groups=None,
+            read_variables=frozenset(),
+            assignees=tuple(),
+            id=None,
+            depends_on=None,
+            depends_on_is_final=None,
+            dependencies=None,
+            groups=None,
+            conflicts_with_groups=None,
             no_sync_with=None,
-            within_inames_is_final=None, within_inames=None,
+            within_inames_is_final=None,
+            within_inames=None,
             priority=0,
-            predicates=frozenset(), tags=None):
+            predicates=frozenset(),
+            tags=None):
         """
         :arg iname_exprs: Like :attr:`iname_exprs`, but instead of tuples,
             simple strings pepresenting inames are also allowed. A single
@@ -1200,11 +1218,13 @@ def __init__(self,
                 id=id,
                 depends_on=depends_on,
                 depends_on_is_final=depends_on_is_final,
+                dependencies=dependencies,
                 groups=groups, conflicts_with_groups=conflicts_with_groups,
                 no_sync_with=no_sync_with,
                 within_inames_is_final=within_inames_is_final,
                 within_inames=within_inames,
-                priority=priority, predicates=predicates, tags=tags)
+                priority=priority, predicates=predicates,
+                tags=tags)
 
         # {{{ normalize iname_exprs
 
@@ -1339,16 +1359,25 @@ class NoOpInstruction(_DataObliviousInstruction):
         ... nop
     """
 
-    def __init__(self, id=None, depends_on=None, depends_on_is_final=None,
-            groups=None, conflicts_with_groups=None,
+    def __init__(
+            self,
+            id=None,
+            depends_on=None,
+            depends_on_is_final=None,
+            dependencies=None,
+            groups=None,
+            conflicts_with_groups=None,
             no_sync_with=None,
-            within_inames_is_final=None, within_inames=None,
+            within_inames_is_final=None,
+            within_inames=None,
             priority=None,
-            predicates=None, tags=None):
+            predicates=None,
+            tags=None):
         super().__init__(
                 id=id,
                 depends_on=depends_on,
                 depends_on_is_final=depends_on_is_final,
+                dependencies=dependencies,
                 groups=groups,
                 conflicts_with_groups=conflicts_with_groups,
                 no_sync_with=no_sync_with,
@@ -1398,12 +1427,21 @@ class BarrierInstruction(_DataObliviousInstruction):
     fields = _DataObliviousInstruction.fields | {"synchronization_kind",
                                                      "mem_kind"}
 
-    def __init__(self, id, depends_on=None, depends_on_is_final=None,
-            groups=None, conflicts_with_groups=None,
+    def __init__(
+            self,
+            id,
+            depends_on=None,
+            depends_on_is_final=None,
+            dependencies=None,
+            groups=None,
+            conflicts_with_groups=None,
             no_sync_with=None,
-            within_inames_is_final=None, within_inames=None,
+            within_inames_is_final=None,
+            within_inames=None,
             priority=None,
-            predicates=None, tags=None, synchronization_kind="global",
+            predicates=None,
+            tags=None,
+            synchronization_kind="global",
             mem_kind="local"):
 
         if predicates:
@@ -1413,6 +1451,7 @@ def __init__(self, id, depends_on=None, depends_on_is_final=None,
                 id=id,
                 depends_on=depends_on,
                 depends_on_is_final=depends_on_is_final,
+                dependencies=dependencies,
                 groups=groups,
                 conflicts_with_groups=conflicts_with_groups,
                 no_sync_with=no_sync_with,

From c03636d3c74d21a56143f98c8fbacab0c887ccb6 Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Fri, 2 Apr 2021 03:54:32 -0500
Subject: [PATCH 229/460] create function to add new dependencies,
 add_stmt_inst_dependency() (copying in changes from downstream PR to create
 intermediate PR)

---
 loopy/transform/instruction.py | 36 ++++++++++++++++++++++++++++++++++
 1 file changed, 36 insertions(+)

diff --git a/loopy/transform/instruction.py b/loopy/transform/instruction.py
index 055384ff1..b1e9cb7a5 100644
--- a/loopy/transform/instruction.py
+++ b/loopy/transform/instruction.py
@@ -117,6 +117,42 @@ def add_dep(insn):
 # }}}
 
 
+# {{{ add_stmt_inst_dependency
+
+def add_stmt_inst_dependency(
+        kernel, stmt_id, depends_on_id, new_dependency):
+    """Add the statement instance dependency *new_dependency* to statement with
+    id *stmt_id*.
+    """
+
+    if stmt_id not in kernel.id_to_insn:
+        raise LoopyError("no instructions found matching '%s',"
+                "cannot add dependency %s->%s"
+                % (stmt_id, depends_on_id, stmt_id))
+    if depends_on_id not in kernel.id_to_insn:
+        raise LoopyError("no instructions found matching '%s',"
+                "cannot add dependency %s->%s"
+                % (depends_on_id, depends_on_id, stmt_id))
+
+    matched = [False]
+
+    def _add_dep(stmt):
+        new_deps_dict = stmt.dependencies  # dict mapping depends-on ids to dep maps
+        matched[0] = True
+        new_deps_dict.setdefault(depends_on_id, []).append(new_dependency)
+        return stmt.copy(dependencies=new_deps_dict)
+
+    result = map_instructions(kernel, "id:%s" % (stmt_id), _add_dep)
+
+    if not matched[0]:  # Is this possible, given check above?
+        raise LoopyError("no instructions found matching '%s' "
+                "(to which dependencies would be added)" % stmt_id)
+
+    return result
+
+# }}}
+
+
 # {{{ remove_instructions
 
 def remove_instructions(kernel, insn_ids):

From 8a2e7ff09ff98572a7906e7168b265321a4659b4 Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Fri, 2 Apr 2021 04:02:10 -0500
Subject: [PATCH 230/460] create function to ensure linearization satisfies
 deps, check_linearization_validity() (copying in changes from downstream PR
 to create intermediate PR)

---
 loopy/schedule/checker/__init__.py | 100 +++++++++++++++++++++++++++++
 1 file changed, 100 insertions(+)

diff --git a/loopy/schedule/checker/__init__.py b/loopy/schedule/checker/__init__.py
index 6a2ecb9c5..e3d4d0090 100644
--- a/loopy/schedule/checker/__init__.py
+++ b/loopy/schedule/checker/__init__.py
@@ -136,3 +136,103 @@ def get_pairwise_statement_orderings(
     # }}}
 
 # }}}
+
+
+def check_linearization_validity(
+        knl,
+        linearization_items,
+        ):
+    # TODO document
+
+    from loopy.schedule.checker.utils import (
+        prettier_map_string,
+    )
+
+    # {{{ make sure kernel has been preprocessed
+
+    # note: kernels must always be preprocessed before scheduling
+    from loopy.kernel import KernelState
+    assert knl.state in [
+            KernelState.PREPROCESSED,
+            KernelState.LINEARIZED]
+
+    # }}}
+
+    # {{{ Create map from dependent instruction id pairs to dependencies
+
+    # To minimize time complexity, all pairwise schedules will be created
+    # in one pass, which first requires finding all pairs of statements involved
+    # in deps.
+    # So, since we have to find these pairs anyway, collect their deps at
+    # the same time so we don't have to do it again later during lin checking.
+
+    stmts_to_deps = {}
+    for insn_after in knl.instructions:
+        for before_id, dep_list in insn_after.dependencies.items():
+            stmts_to_deps.setdefault(
+                (before_id, insn_after.id), []).extend(dep_list)
+    # }}}
+
+    pworders = get_pairwise_statement_orderings(
+        knl,
+        linearization_items,
+        stmts_to_deps.keys(),
+        )
+
+    # For each dependency, create+test linearization containing pair of insns------
+    linearization_is_valid = True
+    for (insn_id_before, insn_id_after), dependencies in stmts_to_deps.items():
+
+        # Get pairwise ordering info for stmts involved in the dependency
+        pworder = pworders[(insn_id_before, insn_id_after)]
+
+        # check each dep for this statement pair
+        for dependency in dependencies:
+
+            # reorder variables/params in constraint map space to match SIO so we can
+            # check to see whether the constraint map is a subset of the SIO
+            # (spaces must be aligned so that the variables in the constraint map
+            # correspond to the same variables in the SIO)
+            from loopy.schedule.checker.utils import (
+                ensure_dim_names_match_and_align,
+            )
+
+            aligned_dep_map = ensure_dim_names_match_and_align(
+                dependency, pworder.sio_intra_thread)
+
+            assert aligned_dep_map.space == pworder.sio_intra_thread.space
+            assert aligned_dep_map.space == pworder.sio_intra_group.space
+            assert aligned_dep_map.space == pworder.sio_global.space
+            assert (aligned_dep_map.get_var_dict() ==
+                pworder.sio_intra_thread.get_var_dict())
+            assert (aligned_dep_map.get_var_dict() ==
+                pworder.sio_intra_group.get_var_dict())
+            assert (aligned_dep_map.get_var_dict() ==
+                pworder.sio_global.get_var_dict())
+
+            if not aligned_dep_map.is_subset(
+                    pworder.sio_intra_thread |
+                    pworder.sio_intra_group |
+                    pworder.sio_global
+                    ):
+
+                linearization_is_valid = False
+
+                print("================ constraint check failure =================")
+                print("Constraint map not subset of SIO")
+                print("Dependencies:")
+                print(insn_id_before+"->"+insn_id_after)
+                print(prettier_map_string(dependency))
+                print("Statement instance ordering:")
+                print(prettier_map_string(pworder.sio_intra_thread))
+                print("dependency.gist(pworder.sio_intra_thread):")
+                print(prettier_map_string(
+                    aligned_dep_map.gist(pworder.sio_intra_thread)))
+                print("pworder.sio_intra_thread.gist(dependency)")
+                print(prettier_map_string(
+                    pworder.sio_intra_thread.gist(aligned_dep_map)))
+                print("Loop priority known:")
+                print(knl.loop_priority)
+                print("===========================================================")
+
+    return linearization_is_valid

From 10f2ff57411c71ce03f925771d5b34a9bc495508 Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Fri, 2 Apr 2021 04:03:45 -0500
Subject: [PATCH 231/460] make check_linearization_validity() and
 add_stmt_inst_dependency() global loopy funcs (copying in changes from
 downstream PR to create intermediate PR)

---
 loopy/__init__.py | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/loopy/__init__.py b/loopy/__init__.py
index 9c4bfa6d0..8cec9c6ac 100644
--- a/loopy/__init__.py
+++ b/loopy/__init__.py
@@ -78,7 +78,8 @@
 
 from loopy.transform.instruction import (
         find_instructions, map_instructions,
-        set_instruction_priority, add_dependency,
+        set_instruction_priority,
+        add_dependency, add_stmt_inst_dependency,
         remove_instructions,
         replace_instruction_ids,
         tag_instructions,
@@ -121,6 +122,8 @@
 from loopy.preprocess import preprocess_kernel, realize_reduction
 from loopy.schedule import (
     generate_loop_schedules, get_one_scheduled_kernel, get_one_linearized_kernel)
+from loopy.schedule.checker import (
+    check_linearization_validity)
 from loopy.statistics import (ToCountMap, CountGranularity,
         stringify_stats_mapping, Op, MemAccess, get_op_map, get_mem_access_map,
         get_synchronization_map, gather_access_footprints,
@@ -202,7 +205,8 @@
         "rename_argument", "set_temporary_scope",
 
         "find_instructions", "map_instructions",
-        "set_instruction_priority", "add_dependency",
+        "set_instruction_priority",
+        "add_dependency", "add_stmt_inst_dependency",
         "remove_instructions",
         "replace_instruction_ids",
         "tag_instructions",
@@ -247,6 +251,7 @@
         "preprocess_kernel", "realize_reduction",
         "generate_loop_schedules",
         "get_one_scheduled_kernel", "get_one_linearized_kernel",
+        "check_linearization_validity",
         "GeneratedProgram", "CodeGenerationResult",
         "PreambleInfo",
         "generate_code", "generate_code_v2", "generate_body",

From a4deb28837a3bae78242938a7547687adec6fee2 Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Fri, 2 Apr 2021 04:05:03 -0500
Subject: [PATCH 232/460] create test for adding dependencies to statements
 (copying in changes from downstream PR to create intermediate PR)

---
 test/test_linearization_checker.py | 137 ++++++++++++++++++++++++++---
 1 file changed, 127 insertions(+), 10 deletions(-)

diff --git a/test/test_linearization_checker.py b/test/test_linearization_checker.py
index fa197bccd..00e35ec5f 100644
--- a/test/test_linearization_checker.py
+++ b/test/test_linearization_checker.py
@@ -134,7 +134,7 @@ def test_pairwise_schedule_creation():
     # Get a linearization
     proc_knl = preprocess_kernel(knl)
     lin_knl = get_one_linearized_kernel(proc_knl)
-    linearization_items = lin_knl.linearization
+    lin_items = lin_knl.linearization
 
     insn_id_pairs = [
         ("stmt_a", "stmt_b"),
@@ -146,7 +146,7 @@ def test_pairwise_schedule_creation():
         ]
     pworders = get_pairwise_statement_orderings(
         lin_knl,
-        linearization_items,
+        lin_items,
         insn_id_pairs,
         )
 
@@ -345,14 +345,14 @@ def test_pairwise_schedule_creation_with_hw_par_tags():
     # Get a linearization
     proc_knl = preprocess_kernel(knl)
     lin_knl = get_one_linearized_kernel(proc_knl)
-    linearization_items = lin_knl.linearization
+    lin_items = lin_knl.linearization
 
     stmt_id_pairs = [
         ("stmt_a", "stmt_b"),
         ]
     pworders = get_pairwise_statement_orderings(
         lin_knl,
-        linearization_items,
+        lin_items,
         stmt_id_pairs,
         )
 
@@ -522,7 +522,7 @@ def test_statement_instance_ordering():
     # Get a linearization
     knl = preprocess_kernel(knl)
     knl = get_one_linearized_kernel(knl)
-    linearization_items = knl.linearization
+    lin_items = knl.linearization
 
     # Get pairwise schedules
     stmt_id_pairs = [
@@ -535,7 +535,7 @@ def test_statement_instance_ordering():
         ]
     pworders = get_pairwise_statement_orderings(
         knl,
-        linearization_items,
+        lin_items,
         stmt_id_pairs,
         )
 
@@ -650,7 +650,7 @@ def test_statement_instance_ordering_with_hw_par_tags():
     # Get a linearization
     proc_knl = preprocess_kernel(knl)
     lin_knl = get_one_linearized_kernel(proc_knl)
-    linearization_items = lin_knl.linearization
+    lin_items = lin_knl.linearization
 
     # Get pairwise schedules
     stmt_id_pairs = [
@@ -658,7 +658,7 @@ def test_statement_instance_ordering_with_hw_par_tags():
         ]
     pworders = get_pairwise_statement_orderings(
         lin_knl,
-        linearization_items,
+        lin_items,
         stmt_id_pairs,
         )
 
@@ -734,11 +734,11 @@ def test_sios_and_schedules_with_barriers():
     # Get a linearization
     proc_knl = preprocess_kernel(knl)
     lin_knl = get_one_linearized_kernel(proc_knl)
-    linearization_items = lin_knl.linearization
+    lin_items = lin_knl.linearization
 
     insn_id_pairs = [("j1", "2"), ("1", "i0")]
     pworders = get_pairwise_statement_orderings(
-        lin_knl, linearization_items, insn_id_pairs)
+        lin_knl, lin_items, insn_id_pairs)
 
     # Relationship between j1 and 2 --------------------------------------------
 
@@ -1004,6 +1004,123 @@ def test_sios_and_schedules_with_barriers():
 # }}}
 
 
+def test_add_stmt_inst_dependencies():
+
+    lp.set_caching_enabled(False)
+    # TODO REMOVE THIS^ (prevents
+    # TypeError: unsupported type for persistent hash keying:<class 'islpy._isl.Map'>
+    # ) during preprocessing
+
+    # Make kernel and use OLD deps to linearize correctly for now
+    i_range_str = "0 <= i < pi"
+    i_range_str_p = "0 <= i' < pi"
+    assumptions_str = "pi >= 1"
+    knl = lp.make_kernel(
+        "{[i]: %s}" % (i_range_str),
+        """
+        a[i] = 3.14  {id=stmt_a}
+        b[i] = a[i]  {id=stmt_b, dep=stmt_a}
+        c[i] = b[i]  {id=stmt_c, dep=stmt_b}
+        """,
+        name="example",
+        assumptions=assumptions_str,
+        lang_version=(2018, 2)
+        )
+    knl = lp.add_and_infer_dtypes(
+            knl, {"a": np.float32, "b": np.float32, "c": np.float32})
+
+    for stmt in knl.instructions:
+        assert not stmt.dependencies
+
+    # Add a dependency to stmt_b
+    dep_b_on_a = _isl_map_with_marked_dims(
+        "[pi] -> {{ [{0}'=0, i'] -> [{0}=1, i] : i > i' "
+        "and {1} and {2} and {3} }}".format(
+            STATEMENT_VAR_NAME,
+            i_range_str,
+            i_range_str_p,
+            assumptions_str,
+            ))
+
+    knl = lp.add_stmt_inst_dependency(knl, "stmt_b", "stmt_a", dep_b_on_a)
+
+    for stmt in knl.instructions:
+        if stmt.id == "stmt_b":
+            assert stmt.dependencies == {
+                "stmt_a": [dep_b_on_a, ],
+                }
+        else:
+            assert not stmt.dependencies
+
+    # Add a second dependency to stmt_b
+    dep_b_on_a_2 = _isl_map_with_marked_dims(
+        "[pi] -> {{ [{0}'=0, i'] -> [{0}=1, i] : i = i' "
+        "and {1} and {2} and {3} }}".format(
+            STATEMENT_VAR_NAME,
+            i_range_str,
+            i_range_str_p,
+            assumptions_str,
+            ))
+
+    knl = lp.add_stmt_inst_dependency(knl, "stmt_b", "stmt_a", dep_b_on_a_2)
+
+    for stmt in knl.instructions:
+        if stmt.id == "stmt_b":
+            assert stmt.dependencies == {
+                "stmt_a": [dep_b_on_a, dep_b_on_a_2],
+                }
+        else:
+            assert not stmt.dependencies
+
+    # Add dependencies to stmt_c
+
+    dep_c_on_a = _isl_map_with_marked_dims(
+        "[pi] -> {{ [{0}'=0, i'] -> [{0}=1, i] : i >= i' "
+        "and {1} and {2} and {3} }}".format(
+            STATEMENT_VAR_NAME,
+            i_range_str,
+            i_range_str_p,
+            assumptions_str,
+            ))
+    dep_c_on_b = _isl_map_with_marked_dims(
+        "[pi] -> {{ [{0}'=0, i'] -> [{0}=1, i] : i >= i' "
+        "and {1} and {2} and {3} }}".format(
+            STATEMENT_VAR_NAME,
+            i_range_str,
+            i_range_str_p,
+            assumptions_str,
+            ))
+
+    knl = lp.add_stmt_inst_dependency(knl, "stmt_c", "stmt_a", dep_c_on_a)
+    knl = lp.add_stmt_inst_dependency(knl, "stmt_c", "stmt_b", dep_c_on_b)
+
+    for stmt in knl.instructions:
+        if stmt.id == "stmt_b":
+            assert stmt.dependencies == {
+                "stmt_a": [dep_b_on_a, dep_b_on_a_2],
+                }
+        elif stmt.id == "stmt_c":
+            assert stmt.dependencies == {
+                "stmt_a": [dep_c_on_a, ],
+                "stmt_b": [dep_c_on_b, ],
+                }
+        else:
+            assert not stmt.dependencies
+
+    # Now make sure deps are satisfied
+    proc_knl = preprocess_kernel(knl)
+    lin_knl = get_one_linearized_kernel(proc_knl)
+    lin_items = lin_knl.linearization
+
+    linearization_is_valid = lp.check_linearization_validity(
+        proc_knl, lin_items)
+
+    assert linearization_is_valid
+
+
+# TODO create more kernels with valid/invalid linearizations to test checker
+
+
 if __name__ == "__main__":
     if len(sys.argv) > 1:
         exec(sys.argv[1])

From b62e109879d0299f157869ab359d5e267569807c Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Fri, 2 Apr 2021 04:20:57 -0500
Subject: [PATCH 233/460] rename
 check_linearization_validity()->check_dependency_satisfaction()

---
 loopy/__init__.py                  | 4 ++--
 loopy/schedule/checker/__init__.py | 2 +-
 test/test_linearization_checker.py | 2 +-
 3 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/loopy/__init__.py b/loopy/__init__.py
index 8cec9c6ac..e7a66ed29 100644
--- a/loopy/__init__.py
+++ b/loopy/__init__.py
@@ -123,7 +123,7 @@
 from loopy.schedule import (
     generate_loop_schedules, get_one_scheduled_kernel, get_one_linearized_kernel)
 from loopy.schedule.checker import (
-    check_linearization_validity)
+    check_dependency_satisfaction)
 from loopy.statistics import (ToCountMap, CountGranularity,
         stringify_stats_mapping, Op, MemAccess, get_op_map, get_mem_access_map,
         get_synchronization_map, gather_access_footprints,
@@ -251,7 +251,7 @@
         "preprocess_kernel", "realize_reduction",
         "generate_loop_schedules",
         "get_one_scheduled_kernel", "get_one_linearized_kernel",
-        "check_linearization_validity",
+        "check_dependency_satisfaction",
         "GeneratedProgram", "CodeGenerationResult",
         "PreambleInfo",
         "generate_code", "generate_code_v2", "generate_body",
diff --git a/loopy/schedule/checker/__init__.py b/loopy/schedule/checker/__init__.py
index e3d4d0090..0fcee4831 100644
--- a/loopy/schedule/checker/__init__.py
+++ b/loopy/schedule/checker/__init__.py
@@ -138,7 +138,7 @@ def get_pairwise_statement_orderings(
 # }}}
 
 
-def check_linearization_validity(
+def check_dependency_satisfaction(
         knl,
         linearization_items,
         ):
diff --git a/test/test_linearization_checker.py b/test/test_linearization_checker.py
index 00e35ec5f..7a3838460 100644
--- a/test/test_linearization_checker.py
+++ b/test/test_linearization_checker.py
@@ -1112,7 +1112,7 @@ def test_add_stmt_inst_dependencies():
     lin_knl = get_one_linearized_kernel(proc_knl)
     lin_items = lin_knl.linearization
 
-    linearization_is_valid = lp.check_linearization_validity(
+    linearization_is_valid = lp.check_dependency_satisfaction(
         proc_knl, lin_items)
 
     assert linearization_is_valid

From 8091b636fed200ff6602fbdb78828ce8f781ccea Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Sun, 4 Apr 2021 20:37:20 -0500
Subject: [PATCH 234/460] in intra-group and global orderings, don't add loop
 dims to lex order if iname is in loops_to_ignore (vec/ilp)

---
 loopy/schedule/checker/schedule.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/loopy/schedule/checker/schedule.py b/loopy/schedule/checker/schedule.py
index 77a2354ca..4a8d1a479 100644
--- a/loopy/schedule/checker/schedule.py
+++ b/loopy/schedule/checker/schedule.py
@@ -457,7 +457,7 @@ def _gather_blex_ordering_info(sync_kind):
         for lin_item in lin_items:
             if isinstance(lin_item, EnterLoop):
                 enter_iname = lin_item.iname
-                if enter_iname in loops_with_barriers[sync_kind]:
+                if enter_iname in loops_with_barriers[sync_kind] - loops_to_ignore:
                     pre_loop_blex_pt = next_blex_tuple[:]
 
                     # Increment next_blex_tuple[-1] for statements in the section
@@ -487,7 +487,7 @@ def _gather_blex_ordering_info(sync_kind):
 
             elif isinstance(lin_item, LeaveLoop):
                 leave_iname = lin_item.iname
-                if leave_iname in loops_with_barriers[sync_kind]:
+                if leave_iname in loops_with_barriers[sync_kind] - loops_to_ignore:
 
                     # Update max blex dims
                     n_seq_blex_dims = max(n_seq_blex_dims, len(next_blex_tuple))

From 0bf7a6e4549ca1de9715adbfe4530673b2824496 Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Sun, 4 Apr 2021 20:38:20 -0500
Subject: [PATCH 235/460] add sched/sio test with vec+barrier

---
 test/test_linearization_checker.py | 219 ++++++++++++++++++++++++++++-
 1 file changed, 217 insertions(+), 2 deletions(-)

diff --git a/test/test_linearization_checker.py b/test/test_linearization_checker.py
index fa197bccd..8f6ccb616 100644
--- a/test/test_linearization_checker.py
+++ b/test/test_linearization_checker.py
@@ -740,7 +740,7 @@ def test_sios_and_schedules_with_barriers():
     pworders = get_pairwise_statement_orderings(
         lin_knl, linearization_items, insn_id_pairs)
 
-    # Relationship between j1 and 2 --------------------------------------------
+    # {{{ Relationship between j1 and 2
 
     # Create expected maps and compare
 
@@ -900,7 +900,9 @@ def test_sios_and_schedules_with_barriers():
 
     assert not unwanted_pairs.is_subset(order_info.sio_intra_group)
 
-    # Relationship between 1 and i0 --------------------------------------------
+    # }}}
+
+    # {{{ Relationship between 1 and i0
 
     # Create expected maps and compare
 
@@ -1001,6 +1003,219 @@ def test_sios_and_schedules_with_barriers():
         sched_after_global_exp=sched_after_global_exp,
         )
 
+    # }}}
+
+# }}}
+
+
+# {{{ SIOs and schedules with vec tag
+
+def test_sios_and_schedules_with_vec_and_barriers():
+    from loopy.schedule.checker import (
+        get_pairwise_statement_orderings,
+    )
+
+    knl = lp.make_kernel(
+        "{[i, j, l0] : 0 <= i < 4 and 0 <= j < n and 0 <= l0 < 32}",
+        """
+        for l0
+            for i
+                for j
+                    b[i,j,l0] = 1 {id=s1}
+                    ... lbarrier  {id=b,dep=s1}
+                    c[i,j,l0] = 2 {id=s2, dep=b}
+                end
+            end
+        end
+        """)
+    knl = lp.add_and_infer_dtypes(knl, {"b": "float32", "c": "float32"})
+
+    knl = lp.tag_inames(knl, {"i": "vec", "l0": "l.0"})
+
+    # Get a linearization
+    proc_knl = preprocess_kernel(knl)
+    lin_knl = get_one_linearized_kernel(proc_knl)
+    linearization_items = lin_knl.linearization
+
+    insn_id_pairs = [("s1", "s2")]
+    pworders = get_pairwise_statement_orderings(
+        lin_knl, linearization_items, insn_id_pairs)
+
+    # {{{ Relationship between s1 and s2
+
+    # Create expected maps and compare
+
+    # Iname bound strings to facilitate creation of expected maps
+    iname_bound_str = "0 <= i < 4 and 0 <= j < n"
+    iname_bound_str_p = "0 <= i' < 4 and 0 <= j' < n"
+    conc_iname_bound_str = "0 <= l0 < 32"
+    conc_iname_bound_str_p = "0 <= l0' < 32"
+
+    # {{{ Intra-thread
+
+    sched_s1_intra_thread_exp = isl.Map(
+        "[n] -> {"
+        "[%s=0, i, j, l0] -> [%s] : "
+        "%s and %s}"  # iname bounds
+        % (
+            STATEMENT_VAR_NAME,
+            _lex_point_string(
+                ["j", "0"],  # lex points (initial matching dim gets removed)
+                lid_inames=["l0"],
+                ),
+            iname_bound_str,
+            conc_iname_bound_str,
+            )
+        )
+
+    sched_s2_intra_thread_exp = isl.Map(
+        "[n] -> {"
+        "[%s=1, i, j, l0] -> [%s] : "
+        "%s and %s}"  # iname bounds
+        % (
+            STATEMENT_VAR_NAME,
+            _lex_point_string(
+                ["j", "1"],  # lex points (initial matching dim gets removed)
+                lid_inames=["l0"],
+                ),
+            iname_bound_str,
+            conc_iname_bound_str,
+            )
+        )
+
+    sio_intra_thread_exp = _isl_map_with_marked_dims(
+        "[n] -> {{ "
+        "[{0}'=0, i', j', l0'] -> [{0}=1, i, j, l0] : "
+        "j' <= j "
+        "and l0 = l0' "  # within a single thread
+        "and {1} and {2} and {3} and {4}"  # iname bounds
+        "}}".format(
+            STATEMENT_VAR_NAME,
+            iname_bound_str,
+            iname_bound_str_p,
+            conc_iname_bound_str,
+            conc_iname_bound_str_p,
+            )
+        )
+
+    # }}}
+
+    # {{{ Intra-group
+
+    # Intra-group scheds would be same due to lbarrier,
+    # but since lex tuples are not simplified in intra-group/global
+    # cases, there's an extra lex dim:
+
+    sched_s1_intra_group_exp = isl.Map(
+        "[n] -> {"
+        "[%s=0, i, j, l0] -> [%s] : "
+        "%s and %s}"  # iname bounds
+        % (
+            STATEMENT_VAR_NAME,
+            _lex_point_string(
+                ["1", "j", "0"],  # lex points
+                lid_inames=["l0"],
+                ),
+            iname_bound_str,
+            conc_iname_bound_str,
+            )
+        )
+
+    sched_s2_intra_group_exp = isl.Map(
+        "[n] -> {"
+        "[%s=1, i, j, l0] -> [%s] : "
+        "%s and %s}"  # iname bounds
+        % (
+            STATEMENT_VAR_NAME,
+            _lex_point_string(
+                ["1", "j", "1"],  # lex points
+                lid_inames=["l0"],
+                ),
+            iname_bound_str,
+            conc_iname_bound_str,
+            )
+        )
+
+    sio_intra_group_exp = _isl_map_with_marked_dims(
+        "[n] -> {{ "
+        "[{0}'=0, i', j', l0'] -> [{0}=1, i, j, l0] : "
+        "j' <= j "
+        "and {1} and {2} and {3} and {4}"  # iname bounds
+        "}}".format(
+            STATEMENT_VAR_NAME,
+            iname_bound_str,
+            iname_bound_str_p,
+            conc_iname_bound_str,
+            conc_iname_bound_str_p,
+            )
+        )
+
+    # }}}
+
+    # {{{ Global
+
+    sched_s1_global_exp = isl.Map(
+        "[n] -> {"
+        "[%s=0, i, j, l0] -> [%s] : "
+        "%s and %s}"  # iname bounds
+        % (
+            STATEMENT_VAR_NAME,
+            _lex_point_string(
+                ["0"],  # lex points
+                lid_inames=["l0"],
+                ),
+            iname_bound_str,
+            conc_iname_bound_str,
+            )
+        )
+
+    # (same as s1 except for statement id because no global barriers)
+    sched_s2_global_exp = isl.Map(
+        "[n] -> {"
+        "[%s=1, i, j, l0] -> [%s] : "
+        "%s and %s}"  # iname bounds
+        % (
+            STATEMENT_VAR_NAME,
+            _lex_point_string(
+                ["0"],  # lex points
+                lid_inames=["l0"],
+                ),
+            iname_bound_str,
+            conc_iname_bound_str,
+            )
+        )
+
+    sio_global_exp = _isl_map_with_marked_dims(
+        "[n] -> {{ "
+        "[{0}'=0, i', j', l0'] -> [{0}=1, i, j, l0] : "
+        "False "
+        "and {1} and {2} and {3} and {4}"  # iname bounds
+        "}}".format(
+            STATEMENT_VAR_NAME,
+            iname_bound_str,
+            iname_bound_str_p,
+            conc_iname_bound_str,
+            conc_iname_bound_str_p,
+            )
+        )
+
+    # }}}
+
+    _check_orderings_for_stmt_pair(
+        "s1", "s2", pworders,
+        sio_intra_thread_exp=sio_intra_thread_exp,
+        sched_before_intra_thread_exp=sched_s1_intra_thread_exp,
+        sched_after_intra_thread_exp=sched_s2_intra_thread_exp,
+        sio_intra_group_exp=sio_intra_group_exp,
+        sched_before_intra_group_exp=sched_s1_intra_group_exp,
+        sched_after_intra_group_exp=sched_s2_intra_group_exp,
+        sio_global_exp=sio_global_exp,
+        sched_before_global_exp=sched_s1_global_exp,
+        sched_after_global_exp=sched_s2_global_exp,
+        )
+
+    # }}}
+
 # }}}
 
 

From 81dcaf7c640a9d2399cfcc86d5c81d5d3bbe9bff Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Sun, 4 Apr 2021 21:21:39 -0500
Subject: [PATCH 236/460] improve formatting and code readibility

---
 test/test_linearization_checker.py | 363 ++++++++++++++++-------------
 1 file changed, 205 insertions(+), 158 deletions(-)

diff --git a/test/test_linearization_checker.py b/test/test_linearization_checker.py
index 8f6ccb616..7c2272c82 100644
--- a/test/test_linearization_checker.py
+++ b/test/test_linearization_checker.py
@@ -50,7 +50,7 @@
 logger = logging.getLogger(__name__)
 
 
-# {{{ helper functions for map creation/handling
+# {{{ Helper functions for map creation/handling
 
 def _align_and_compare_maps(maps):
     from loopy.schedule.checker.utils import prettier_map_string
@@ -88,12 +88,51 @@ def _isl_map_with_marked_dims(s):
     # Isl ignores the apostrophes in map strings, until they are explicitly added
     return append_marker_to_isl_map_var_names(isl.Map(s), dt.in_, BEFORE_MARK)
 
+
+def _check_orderings_for_stmt_pair(
+        stmt_id_before,
+        stmt_id_after,
+        all_sios,
+        sio_intra_thread_exp=None,
+        sched_before_intra_thread_exp=None,
+        sched_after_intra_thread_exp=None,
+        sio_intra_group_exp=None,
+        sched_before_intra_group_exp=None,
+        sched_after_intra_group_exp=None,
+        sio_global_exp=None,
+        sched_before_global_exp=None,
+        sched_after_global_exp=None,
+        ):
+
+    order_info = all_sios[(stmt_id_before, stmt_id_after)]
+
+    # Get pairs of maps to compare for equality
+    map_candidates = zip([
+        sio_intra_thread_exp,
+        sched_before_intra_thread_exp, sched_after_intra_thread_exp,
+        sio_intra_group_exp,
+        sched_before_intra_group_exp, sched_after_intra_group_exp,
+        sio_global_exp,
+        sched_before_global_exp, sched_after_global_exp,
+        ], [
+        order_info.sio_intra_thread,
+        order_info.pwsched_intra_thread[0], order_info.pwsched_intra_thread[1],
+        order_info.sio_intra_group,
+        order_info.pwsched_intra_group[0], order_info.pwsched_intra_group[1],
+        order_info.sio_global,
+        order_info.pwsched_global[0], order_info.pwsched_global[1],
+        ])
+
+    # Only compare to maps that were passed
+    maps_to_compare = [(m1, m2) for m1, m2 in map_candidates if m1 is not None]
+    _align_and_compare_maps(maps_to_compare)
+
 # }}}
 
 
-# {{{ test pairwise schedule creation
+# {{{ test_intra_thread_pairwise_schedule_creation()
 
-def test_pairwise_schedule_creation():
+def test_intra_thread_pairwise_schedule_creation():
     from loopy.schedule.checker import (
         get_pairwise_statement_orderings,
     )
@@ -136,7 +175,7 @@ def test_pairwise_schedule_creation():
     lin_knl = get_one_linearized_kernel(proc_knl)
     linearization_items = lin_knl.linearization
 
-    insn_id_pairs = [
+    stmt_id_pairs = [
         ("stmt_a", "stmt_b"),
         ("stmt_a", "stmt_c"),
         ("stmt_a", "stmt_d"),
@@ -147,14 +186,14 @@ def test_pairwise_schedule_creation():
     pworders = get_pairwise_statement_orderings(
         lin_knl,
         linearization_items,
-        insn_id_pairs,
+        stmt_id_pairs,
         )
 
-    # Relationship between stmt_a and stmt_b ---------------------------------------
+    # {{{ Relationship between stmt_a and stmt_b
 
     # Create expected maps and compare
 
-    sched_before_intra_thread_exp = isl.Map(
+    sched_stmt_a_intra_thread_exp = isl.Map(
         "[pi, pk] -> { [%s=0, i, k] -> [%s] : 0 <= i < pi and 0 <= k < pk }"
         % (
             STATEMENT_VAR_NAME,
@@ -162,7 +201,7 @@ def test_pairwise_schedule_creation():
             )
         )
 
-    sched_after_intra_thread_exp = isl.Map(
+    sched_stmt_b_intra_thread_exp = isl.Map(
         "[pi, pj] -> { [%s=1, i, j] -> [%s] : 0 <= i < pi and 0 <= j < pj }"
         % (
             STATEMENT_VAR_NAME,
@@ -172,16 +211,17 @@ def test_pairwise_schedule_creation():
 
     _check_orderings_for_stmt_pair(
         "stmt_a", "stmt_b", pworders,
-        sched_before_intra_thread_exp=sched_before_intra_thread_exp,
-        sched_after_intra_thread_exp=sched_after_intra_thread_exp,
+        sched_before_intra_thread_exp=sched_stmt_a_intra_thread_exp,
+        sched_after_intra_thread_exp=sched_stmt_b_intra_thread_exp,
         )
 
-    # ------------------------------------------------------------------------------
-    # Relationship between stmt_a and stmt_c ---------------------------------------
+    # }}}
+
+    # {{{ Relationship between stmt_a and stmt_c
 
     # Create expected maps and compare
 
-    sched_before_intra_thread_exp = isl.Map(
+    sched_stmt_a_intra_thread_exp = isl.Map(
         "[pi, pk] -> { [%s=0, i, k] -> [%s] : 0 <= i < pi and 0 <= k < pk }"
         % (
             STATEMENT_VAR_NAME,
@@ -189,7 +229,7 @@ def test_pairwise_schedule_creation():
             )
         )
 
-    sched_after_intra_thread_exp = isl.Map(
+    sched_stmt_c_intra_thread_exp = isl.Map(
         "[pi, pj] -> { [%s=1, i, j] -> [%s] : 0 <= i < pi and 0 <= j < pj }"
         % (
             STATEMENT_VAR_NAME,
@@ -199,16 +239,17 @@ def test_pairwise_schedule_creation():
 
     _check_orderings_for_stmt_pair(
         "stmt_a", "stmt_c", pworders,
-        sched_before_intra_thread_exp=sched_before_intra_thread_exp,
-        sched_after_intra_thread_exp=sched_after_intra_thread_exp,
+        sched_before_intra_thread_exp=sched_stmt_a_intra_thread_exp,
+        sched_after_intra_thread_exp=sched_stmt_c_intra_thread_exp,
         )
 
-    # ------------------------------------------------------------------------------
-    # Relationship between stmt_a and stmt_d ---------------------------------------
+    # }}}
+
+    # {{{ Relationship between stmt_a and stmt_d
 
     # Create expected maps and compare
 
-    sched_before_intra_thread_exp = isl.Map(
+    sched_stmt_a_intra_thread_exp = isl.Map(
         "[pi, pk] -> { [%s=0, i, k] -> [%s] : 0 <= i < pi and 0 <= k < pk }"
         % (
             STATEMENT_VAR_NAME,
@@ -216,7 +257,7 @@ def test_pairwise_schedule_creation():
             )
         )
 
-    sched_after_intra_thread_exp = isl.Map(
+    sched_stmt_d_intra_thread_exp = isl.Map(
         "[pt] -> { [%s=1, t] -> [%s] : 0 <= t < pt }"
         % (
             STATEMENT_VAR_NAME,
@@ -226,16 +267,17 @@ def test_pairwise_schedule_creation():
 
     _check_orderings_for_stmt_pair(
         "stmt_a", "stmt_d", pworders,
-        sched_before_intra_thread_exp=sched_before_intra_thread_exp,
-        sched_after_intra_thread_exp=sched_after_intra_thread_exp,
+        sched_before_intra_thread_exp=sched_stmt_a_intra_thread_exp,
+        sched_after_intra_thread_exp=sched_stmt_d_intra_thread_exp,
         )
 
-    # ------------------------------------------------------------------------------
-    # Relationship between stmt_b and stmt_c ---------------------------------------
+    # }}}
+
+    # {{{ Relationship between stmt_b and stmt_c
 
     # Create expected maps and compare
 
-    sched_before_intra_thread_exp = isl.Map(
+    sched_stmt_b_intra_thread_exp = isl.Map(
         "[pi, pj] -> { [%s=0, i, j] -> [%s] : 0 <= i < pi and 0 <= j < pj }"
         % (
             STATEMENT_VAR_NAME,
@@ -243,7 +285,7 @@ def test_pairwise_schedule_creation():
             )
         )
 
-    sched_after_intra_thread_exp = isl.Map(
+    sched_stmt_c_intra_thread_exp = isl.Map(
         "[pi, pj] -> { [%s=1, i, j] -> [%s] : 0 <= i < pi and 0 <= j < pj }"
         % (
             STATEMENT_VAR_NAME,
@@ -253,16 +295,17 @@ def test_pairwise_schedule_creation():
 
     _check_orderings_for_stmt_pair(
         "stmt_b", "stmt_c", pworders,
-        sched_before_intra_thread_exp=sched_before_intra_thread_exp,
-        sched_after_intra_thread_exp=sched_after_intra_thread_exp,
+        sched_before_intra_thread_exp=sched_stmt_b_intra_thread_exp,
+        sched_after_intra_thread_exp=sched_stmt_c_intra_thread_exp,
         )
 
-    # ------------------------------------------------------------------------------
-    # Relationship between stmt_b and stmt_d ---------------------------------------
+    # }}}
+
+    # {{{ Relationship between stmt_b and stmt_d
 
     # Create expected maps and compare
 
-    sched_before_intra_thread_exp = isl.Map(
+    sched_stmt_b_intra_thread_exp = isl.Map(
         "[pi, pj] -> { [%s=0, i, j] -> [%s] : 0 <= i < pi and 0 <= j < pj }"
         % (
             STATEMENT_VAR_NAME,
@@ -270,7 +313,7 @@ def test_pairwise_schedule_creation():
             )
         )
 
-    sched_after_intra_thread_exp = isl.Map(
+    sched_stmt_d_intra_thread_exp = isl.Map(
         "[pt] -> { [%s=1, t] -> [%s] : 0 <= t < pt }"
         % (
             STATEMENT_VAR_NAME,
@@ -280,16 +323,17 @@ def test_pairwise_schedule_creation():
 
     _check_orderings_for_stmt_pair(
         "stmt_b", "stmt_d", pworders,
-        sched_before_intra_thread_exp=sched_before_intra_thread_exp,
-        sched_after_intra_thread_exp=sched_after_intra_thread_exp,
+        sched_before_intra_thread_exp=sched_stmt_b_intra_thread_exp,
+        sched_after_intra_thread_exp=sched_stmt_d_intra_thread_exp,
         )
 
-    # ------------------------------------------------------------------------------
-    # Relationship between stmt_c and stmt_d ---------------------------------------
+    # }}}
+
+    # {{{ Relationship between stmt_c and stmt_d
 
     # Create expected maps and compare
 
-    sched_before_intra_thread_exp = isl.Map(
+    sched_stmt_c_intra_thread_exp = isl.Map(
         "[pi, pj] -> { [%s=0, i, j] -> [%s] : 0 <= i < pi and 0 <= j < pj }"
         % (
             STATEMENT_VAR_NAME,
@@ -297,7 +341,7 @@ def test_pairwise_schedule_creation():
             )
         )
 
-    sched_after_intra_thread_exp = isl.Map(
+    sched_stmt_d_intra_thread_exp = isl.Map(
         "[pt] -> { [%s=1, t] -> [%s] : 0 <= t < pt }"
         % (
             STATEMENT_VAR_NAME,
@@ -307,12 +351,20 @@ def test_pairwise_schedule_creation():
 
     _check_orderings_for_stmt_pair(
         "stmt_c", "stmt_d", pworders,
-        sched_before_intra_thread_exp=sched_before_intra_thread_exp,
-        sched_after_intra_thread_exp=sched_after_intra_thread_exp,
+        sched_before_intra_thread_exp=sched_stmt_c_intra_thread_exp,
+        sched_after_intra_thread_exp=sched_stmt_d_intra_thread_exp,
         )
 
+    # }}}
+
+# }}}
+
+
+# {{{ test_pairwise_schedule_creation_with_hw_par_tags()
 
 def test_pairwise_schedule_creation_with_hw_par_tags():
+    # (further sched testing in SIO tests below)
+
     from loopy.schedule.checker import (
         get_pairwise_statement_orderings,
     )
@@ -356,11 +408,11 @@ def test_pairwise_schedule_creation_with_hw_par_tags():
         stmt_id_pairs,
         )
 
-    # Relationship between stmt_a and stmt_b ---------------------------------------
+    # {{{ Relationship between stmt_a and stmt_b
 
     # Create expected maps and compare
 
-    sched_before_intra_thread_exp = isl.Map(
+    sched_stmt_a_intra_thread_exp = isl.Map(
         "[pi,pj] -> {[%s=0,i,ii,j,jj] -> [%s] : 0 <= i,ii < pi and 0 <= j,jj < pj}"
         % (
             STATEMENT_VAR_NAME,
@@ -371,7 +423,7 @@ def test_pairwise_schedule_creation_with_hw_par_tags():
             )
         )
 
-    sched_after_intra_thread_exp = isl.Map(
+    sched_stmt_b_intra_thread_exp = isl.Map(
         "[pi,pj] -> {[%s=1,i,ii,j,jj] -> [%s] : 0 <= i,ii < pi and 0 <= j,jj < pj}"
         % (
             STATEMENT_VAR_NAME,
@@ -384,16 +436,16 @@ def test_pairwise_schedule_creation_with_hw_par_tags():
 
     _check_orderings_for_stmt_pair(
         "stmt_a", "stmt_b", pworders,
-        sched_before_intra_thread_exp=sched_before_intra_thread_exp,
-        sched_after_intra_thread_exp=sched_after_intra_thread_exp,
+        sched_before_intra_thread_exp=sched_stmt_a_intra_thread_exp,
+        sched_after_intra_thread_exp=sched_stmt_b_intra_thread_exp,
         )
 
-    # ------------------------------------------------------------------------------
+    # }}}
 
 # }}}
 
 
-# {{{ test lex order map creation
+# {{{ test_lex_order_map_creation()
 
 def test_lex_order_map_creation():
     from loopy.schedule.checker.lexicographic_order_map import (
@@ -441,48 +493,9 @@ def _check_lex_map(exp_lex_order_map, n_dims):
 # }}}
 
 
-# {{{ test statement instance ordering creation
+# {{{ test_intra_thread_statement_instance_ordering()
 
-def _check_orderings_for_stmt_pair(
-        stmt_id_before,
-        stmt_id_after,
-        all_sios,
-        sio_intra_thread_exp=None,
-        sched_before_intra_thread_exp=None,
-        sched_after_intra_thread_exp=None,
-        sio_intra_group_exp=None,
-        sched_before_intra_group_exp=None,
-        sched_after_intra_group_exp=None,
-        sio_global_exp=None,
-        sched_before_global_exp=None,
-        sched_after_global_exp=None,
-        ):
-
-    order_info = all_sios[(stmt_id_before, stmt_id_after)]
-
-    # Get pairs of maps to compare for equality
-    map_candidates = zip([
-        sio_intra_thread_exp,
-        sched_before_intra_thread_exp, sched_after_intra_thread_exp,
-        sio_intra_group_exp,
-        sched_before_intra_group_exp, sched_after_intra_group_exp,
-        sio_global_exp,
-        sched_before_global_exp, sched_after_global_exp,
-        ], [
-        order_info.sio_intra_thread,
-        order_info.pwsched_intra_thread[0], order_info.pwsched_intra_thread[1],
-        order_info.sio_intra_group,
-        order_info.pwsched_intra_group[0], order_info.pwsched_intra_group[1],
-        order_info.sio_global,
-        order_info.pwsched_global[0], order_info.pwsched_global[1],
-        ])
-
-    # Only compare to maps that were passed
-    maps_to_compare = [(m1, m2) for m1, m2 in map_candidates if m1 is not None]
-    _align_and_compare_maps(maps_to_compare)
-
-
-def test_statement_instance_ordering():
+def test_intra_thread_statement_instance_ordering():
     from loopy.schedule.checker import (
         get_pairwise_statement_orderings,
     )
@@ -539,7 +552,7 @@ def test_statement_instance_ordering():
         stmt_id_pairs,
         )
 
-    # Relationship between stmt_a and stmt_b ---------------------------------------
+    # {{{ Relationship between stmt_a and stmt_b
 
     sio_intra_thread_exp = _isl_map_with_marked_dims(
         "[pi, pj, pk] -> {{ "
@@ -551,7 +564,9 @@ def test_statement_instance_ordering():
     _check_orderings_for_stmt_pair(
         "stmt_a", "stmt_b", pworders, sio_intra_thread_exp=sio_intra_thread_exp)
 
-    # Relationship between stmt_a and stmt_c ---------------------------------------
+    # }}}
+
+    # {{{ Relationship between stmt_a and stmt_c
 
     sio_intra_thread_exp = _isl_map_with_marked_dims(
         "[pi, pj, pk] -> {{ "
@@ -563,7 +578,9 @@ def test_statement_instance_ordering():
     _check_orderings_for_stmt_pair(
         "stmt_a", "stmt_c", pworders, sio_intra_thread_exp=sio_intra_thread_exp)
 
-    # Relationship between stmt_a and stmt_d ---------------------------------------
+    # }}}
+
+    # {{{ Relationship between stmt_a and stmt_d
 
     sio_intra_thread_exp = _isl_map_with_marked_dims(
         "[pt, pi, pk] -> {{ "
@@ -575,7 +592,9 @@ def test_statement_instance_ordering():
     _check_orderings_for_stmt_pair(
         "stmt_a", "stmt_d", pworders, sio_intra_thread_exp=sio_intra_thread_exp)
 
-    # Relationship between stmt_b and stmt_c ---------------------------------------
+    # }}}
+
+    # {{{ Relationship between stmt_b and stmt_c
 
     sio_intra_thread_exp = _isl_map_with_marked_dims(
         "[pi, pj] -> {{ "
@@ -589,7 +608,9 @@ def test_statement_instance_ordering():
     _check_orderings_for_stmt_pair(
         "stmt_b", "stmt_c", pworders, sio_intra_thread_exp=sio_intra_thread_exp)
 
-    # Relationship between stmt_b and stmt_d ---------------------------------------
+    # }}}
+
+    # {{{ Relationship between stmt_b and stmt_d
 
     sio_intra_thread_exp = _isl_map_with_marked_dims(
         "[pt, pi, pj] -> {{ "
@@ -601,7 +622,9 @@ def test_statement_instance_ordering():
     _check_orderings_for_stmt_pair(
         "stmt_b", "stmt_d", pworders, sio_intra_thread_exp=sio_intra_thread_exp)
 
-    # Relationship between stmt_c and stmt_d ---------------------------------------
+    # }}}
+
+    # {{{ Relationship between stmt_c and stmt_d
 
     sio_intra_thread_exp = _isl_map_with_marked_dims(
         "[pt, pi, pj] -> {{ "
@@ -613,6 +636,12 @@ def test_statement_instance_ordering():
     _check_orderings_for_stmt_pair(
         "stmt_c", "stmt_d", pworders, sio_intra_thread_exp=sio_intra_thread_exp)
 
+    # }}}
+
+# }}}
+
+
+# {{{ test_statement_instance_ordering_with_hw_par_tags()
 
 def test_statement_instance_ordering_with_hw_par_tags():
     from loopy.schedule.checker import (
@@ -667,7 +696,7 @@ def test_statement_instance_ordering_with_hw_par_tags():
     par_iname_condition = " and ".join(
         "{0} = {0}'".format(iname) for iname in conc_inames)
 
-    # Relationship between stmt_a and stmt_b ---------------------------------------
+    # {{{ Relationship between stmt_a and stmt_b
 
     sio_intra_thread_exp = _isl_map_with_marked_dims(
         "[pi, pj] -> {{ "
@@ -683,12 +712,12 @@ def test_statement_instance_ordering_with_hw_par_tags():
     _check_orderings_for_stmt_pair(
         "stmt_a", "stmt_b", pworders, sio_intra_thread_exp=sio_intra_thread_exp)
 
-    # ------------------------------------------------------------------------------
+    # }}}
 
 # }}}
 
 
-# {{{ SIOs and schedules with barriers
+# {{{ test_sios_and_schedules_with_barriers()
 
 def test_sios_and_schedules_with_barriers():
     from loopy.schedule.checker import (
@@ -705,22 +734,22 @@ def test_sios_and_schedules_with_barriers():
         for g0
             for l0
                 for l1
-                    <>temp0 = 0  {id=0}
-                    ... lbarrier  {id=b0,dep=0}
-                    <>temp1 = 1  {id=1,dep=b0}
+                    <>temp0 = 0  {id=stmt_0}
+                    ... lbarrier  {id=stmt_b0,dep=stmt_0}
+                    <>temp1 = 1  {id=stmt_1,dep=stmt_b0}
                     for i
-                        <>tempi0 = 0  {id=i0,dep=1}
-                        ... lbarrier {id=ib0,dep=i0}
-                        ... gbarrier {id=ibb0,dep=i0}
-                        <>tempi1 = 0  {id=i1,dep=ib0}
-                        <>tempi2 = 0  {id=i2,dep=i1}
+                        <>tempi0 = 0  {id=stmt_i0,dep=stmt_1}
+                        ... lbarrier {id=stmt_ib0,dep=stmt_i0}
+                        ... gbarrier {id=stmt_ibb0,dep=stmt_i0}
+                        <>tempi1 = 0  {id=stmt_i1,dep=stmt_ib0}
+                        <>tempi2 = 0  {id=stmt_i2,dep=stmt_i1}
                         for j
-                            <>tempj0 = 0  {id=j0,dep=i2}
-                            ... lbarrier {id=jb0,dep=j0}
-                            <>tempj1 = 0  {id=j1,dep=jb0}
+                            <>tempj0 = 0  {id=stmt_j0,dep=stmt_i2}
+                            ... lbarrier {id=stmt_jb0,dep=stmt_j0}
+                            <>tempj1 = 0  {id=stmt_j1,dep=stmt_jb0}
                         end
                     end
-                    <>temp2 = 0  {id=2,dep=i0}
+                    <>temp2 = 0  {id=stmt_2,dep=stmt_i0}
                 end
             end
         end
@@ -736,11 +765,11 @@ def test_sios_and_schedules_with_barriers():
     lin_knl = get_one_linearized_kernel(proc_knl)
     linearization_items = lin_knl.linearization
 
-    insn_id_pairs = [("j1", "2"), ("1", "i0")]
+    stmt_id_pairs = [("stmt_j1", "stmt_2"), ("stmt_1", "stmt_i0")]
     pworders = get_pairwise_statement_orderings(
-        lin_knl, linearization_items, insn_id_pairs)
+        lin_knl, linearization_items, stmt_id_pairs)
 
-    # {{{ Relationship between j1 and 2
+    # {{{ Relationship between stmt_j1 and stmt_2
 
     # Create expected maps and compare
 
@@ -750,7 +779,9 @@ def test_sios_and_schedules_with_barriers():
     conc_iname_bound_str = "0 <= l0,l1,g0 < lg_end"
     conc_iname_bound_str_p = "0 <= l0',l1',g0' < lg_end"
 
-    sched_before_intra_group_exp = isl.Map(
+    # {{{ Intra-group
+
+    sched_stmt_j1_intra_group_exp = isl.Map(
         "[ij_start, ij_end, lg_end] -> {"
         "[%s=0, i, j, l0, l1, g0] -> [%s] : "
         "%s and %s}"  # iname bounds
@@ -765,7 +796,7 @@ def test_sios_and_schedules_with_barriers():
             )
         )
 
-    sched_after_intra_group_exp = isl.Map(
+    sched_stmt_2_intra_group_exp = isl.Map(
         "[lg_end] -> {[%s=1, l0, l1, g0] -> [%s] : %s}"
         % (
             STATEMENT_VAR_NAME,
@@ -794,7 +825,11 @@ def test_sios_and_schedules_with_barriers():
             )
         )
 
-    sched_before_global_exp = isl.Map(
+    # }}}
+
+    # {{{ Global
+
+    sched_stmt_j1_global_exp = isl.Map(
         "[ij_start, ij_end, lg_end] -> {"
         "[%s=0, i, j, l0, l1, g0] -> [%s] : "
         "%s and %s}"  # iname bounds
@@ -809,7 +844,7 @@ def test_sios_and_schedules_with_barriers():
             )
         )
 
-    sched_after_global_exp = isl.Map(
+    sched_stmt_2_global_exp = isl.Map(
         "[lg_end] -> {[%s=1, l0, l1, g0] -> [%s] : "
         "%s}"  # iname bounds
         % (
@@ -837,24 +872,26 @@ def test_sios_and_schedules_with_barriers():
             )
         )
 
+    # }}}
+
     _check_orderings_for_stmt_pair(
-        "j1", "2", pworders,
+        "stmt_j1", "stmt_2", pworders,
         sio_intra_group_exp=sio_intra_group_exp,
-        sched_before_intra_group_exp=sched_before_intra_group_exp,
-        sched_after_intra_group_exp=sched_after_intra_group_exp,
+        sched_before_intra_group_exp=sched_stmt_j1_intra_group_exp,
+        sched_after_intra_group_exp=sched_stmt_2_intra_group_exp,
         sio_global_exp=sio_global_exp,
-        sched_before_global_exp=sched_before_global_exp,
-        sched_after_global_exp=sched_after_global_exp,
+        sched_before_global_exp=sched_stmt_j1_global_exp,
+        sched_after_global_exp=sched_stmt_2_global_exp,
         )
 
-    # Check for some key example pairs in the sio_intra_group map
+    # {{{ Check for some key example pairs in the sio_intra_group map
 
     # Get maps
-    order_info = pworders[("j1", "2")]
+    order_info = pworders[("stmt_j1", "stmt_2")]
 
     # As long as this is not the last iteration of the i loop, then there
-    # should be a barrier between the last instance of statement j1
-    # and statement 2:
+    # should be a barrier between the last instance of statement stmt_j1
+    # and statement stmt_2:
     ij_end_val = 7
     last_i_val = ij_end_val - 1
     max_non_last_i_val = last_i_val - 1  # max i val that isn't the last iteration
@@ -879,8 +916,8 @@ def test_sios_and_schedules_with_barriers():
     assert wanted_pairs.is_subset(order_info.sio_intra_group)
 
     # If this IS the last iteration of the i loop, then there
-    # should NOT be a barrier between the last instance of statement j1
-    # and statement 2:
+    # should NOT be a barrier between the last instance of statement stmt_j1
+    # and statement stmt_2:
     unwanted_pairs = _isl_map_with_marked_dims(
         "[ij_start, ij_end, lg_end] -> {{"
         "[{0}' = 0, i', j'=ij_end-1, g0', l0', l1'] -> [{0} = 1, l0, l1, g0] : "
@@ -902,11 +939,15 @@ def test_sios_and_schedules_with_barriers():
 
     # }}}
 
-    # {{{ Relationship between 1 and i0
+    # }}}
+
+    # {{{ Relationship between stmt_1 and stmt_i0
 
     # Create expected maps and compare
 
-    sched_before_intra_group_exp = isl.Map(
+    # {{{ Intra-group
+
+    sched_stmt_1_intra_group_exp = isl.Map(
         "[lg_end] -> {[%s=0, l0, l1, g0] -> [%s] : "
         "%s}"  # iname bounds
         % (
@@ -919,7 +960,7 @@ def test_sios_and_schedules_with_barriers():
             )
         )
 
-    sched_after_intra_group_exp = isl.Map(
+    sched_stmt_i0_intra_group_exp = isl.Map(
         "[ij_start, ij_end, lg_end] -> {"
         "[%s=1, i, j, l0, l1, g0] -> [%s] : "
         "%s and %s}"  # iname bounds
@@ -950,7 +991,11 @@ def test_sios_and_schedules_with_barriers():
             )
         )
 
-    sched_before_global_exp = isl.Map(
+    # }}}
+
+    # {{{ Global
+
+    sched_stmt_1_global_exp = isl.Map(
         "[lg_end] -> {[%s=0, l0, l1, g0] -> [%s] : "
         "%s}"  # iname bounds
         % (
@@ -963,7 +1008,7 @@ def test_sios_and_schedules_with_barriers():
             )
         )
 
-    sched_after_global_exp = isl.Map(
+    sched_stmt_i0_global_exp = isl.Map(
         "[ij_start, ij_end, lg_end] -> {"
         "[%s=1, i, j, l0, l1, g0] -> [%s] : "
         "%s and %s}"  # iname bounds
@@ -993,14 +1038,16 @@ def test_sios_and_schedules_with_barriers():
             )
         )
 
+    # }}}
+
     _check_orderings_for_stmt_pair(
-        "1", "i0", pworders,
+        "stmt_1", "stmt_i0", pworders,
         sio_intra_group_exp=sio_intra_group_exp,
-        sched_before_intra_group_exp=sched_before_intra_group_exp,
-        sched_after_intra_group_exp=sched_after_intra_group_exp,
+        sched_before_intra_group_exp=sched_stmt_1_intra_group_exp,
+        sched_after_intra_group_exp=sched_stmt_i0_intra_group_exp,
         sio_global_exp=sio_global_exp,
-        sched_before_global_exp=sched_before_global_exp,
-        sched_after_global_exp=sched_after_global_exp,
+        sched_before_global_exp=sched_stmt_1_global_exp,
+        sched_after_global_exp=sched_stmt_i0_global_exp,
         )
 
     # }}}
@@ -1008,7 +1055,7 @@ def test_sios_and_schedules_with_barriers():
 # }}}
 
 
-# {{{ SIOs and schedules with vec tag
+# {{{ test_sios_and_schedules_with_vec_and_barriers()
 
 def test_sios_and_schedules_with_vec_and_barriers():
     from loopy.schedule.checker import (
@@ -1021,9 +1068,9 @@ def test_sios_and_schedules_with_vec_and_barriers():
         for l0
             for i
                 for j
-                    b[i,j,l0] = 1 {id=s1}
-                    ... lbarrier  {id=b,dep=s1}
-                    c[i,j,l0] = 2 {id=s2, dep=b}
+                    b[i,j,l0] = 1 {id=stmt_1}
+                    ... lbarrier  {id=b,dep=stmt_1}
+                    c[i,j,l0] = 2 {id=stmt_2, dep=b}
                 end
             end
         end
@@ -1037,11 +1084,11 @@ def test_sios_and_schedules_with_vec_and_barriers():
     lin_knl = get_one_linearized_kernel(proc_knl)
     linearization_items = lin_knl.linearization
 
-    insn_id_pairs = [("s1", "s2")]
+    stmt_id_pairs = [("stmt_1", "stmt_2")]
     pworders = get_pairwise_statement_orderings(
-        lin_knl, linearization_items, insn_id_pairs)
+        lin_knl, linearization_items, stmt_id_pairs)
 
-    # {{{ Relationship between s1 and s2
+    # {{{ Relationship between stmt_1 and stmt_2
 
     # Create expected maps and compare
 
@@ -1053,7 +1100,7 @@ def test_sios_and_schedules_with_vec_and_barriers():
 
     # {{{ Intra-thread
 
-    sched_s1_intra_thread_exp = isl.Map(
+    sched_stmt_1_intra_thread_exp = isl.Map(
         "[n] -> {"
         "[%s=0, i, j, l0] -> [%s] : "
         "%s and %s}"  # iname bounds
@@ -1068,7 +1115,7 @@ def test_sios_and_schedules_with_vec_and_barriers():
             )
         )
 
-    sched_s2_intra_thread_exp = isl.Map(
+    sched_stmt_2_intra_thread_exp = isl.Map(
         "[n] -> {"
         "[%s=1, i, j, l0] -> [%s] : "
         "%s and %s}"  # iname bounds
@@ -1106,7 +1153,7 @@ def test_sios_and_schedules_with_vec_and_barriers():
     # but since lex tuples are not simplified in intra-group/global
     # cases, there's an extra lex dim:
 
-    sched_s1_intra_group_exp = isl.Map(
+    sched_stmt_1_intra_group_exp = isl.Map(
         "[n] -> {"
         "[%s=0, i, j, l0] -> [%s] : "
         "%s and %s}"  # iname bounds
@@ -1121,7 +1168,7 @@ def test_sios_and_schedules_with_vec_and_barriers():
             )
         )
 
-    sched_s2_intra_group_exp = isl.Map(
+    sched_stmt_2_intra_group_exp = isl.Map(
         "[n] -> {"
         "[%s=1, i, j, l0] -> [%s] : "
         "%s and %s}"  # iname bounds
@@ -1154,7 +1201,7 @@ def test_sios_and_schedules_with_vec_and_barriers():
 
     # {{{ Global
 
-    sched_s1_global_exp = isl.Map(
+    sched_stmt_1_global_exp = isl.Map(
         "[n] -> {"
         "[%s=0, i, j, l0] -> [%s] : "
         "%s and %s}"  # iname bounds
@@ -1169,8 +1216,8 @@ def test_sios_and_schedules_with_vec_and_barriers():
             )
         )
 
-    # (same as s1 except for statement id because no global barriers)
-    sched_s2_global_exp = isl.Map(
+    # (same as stmt_1 except for statement id because no global barriers)
+    sched_stmt_2_global_exp = isl.Map(
         "[n] -> {"
         "[%s=1, i, j, l0] -> [%s] : "
         "%s and %s}"  # iname bounds
@@ -1202,16 +1249,16 @@ def test_sios_and_schedules_with_vec_and_barriers():
     # }}}
 
     _check_orderings_for_stmt_pair(
-        "s1", "s2", pworders,
+        "stmt_1", "stmt_2", pworders,
         sio_intra_thread_exp=sio_intra_thread_exp,
-        sched_before_intra_thread_exp=sched_s1_intra_thread_exp,
-        sched_after_intra_thread_exp=sched_s2_intra_thread_exp,
+        sched_before_intra_thread_exp=sched_stmt_1_intra_thread_exp,
+        sched_after_intra_thread_exp=sched_stmt_2_intra_thread_exp,
         sio_intra_group_exp=sio_intra_group_exp,
-        sched_before_intra_group_exp=sched_s1_intra_group_exp,
-        sched_after_intra_group_exp=sched_s2_intra_group_exp,
+        sched_before_intra_group_exp=sched_stmt_1_intra_group_exp,
+        sched_after_intra_group_exp=sched_stmt_2_intra_group_exp,
         sio_global_exp=sio_global_exp,
-        sched_before_global_exp=sched_s1_global_exp,
-        sched_after_global_exp=sched_s2_global_exp,
+        sched_before_global_exp=sched_stmt_1_global_exp,
+        sched_after_global_exp=sched_stmt_2_global_exp,
         )
 
     # }}}

From a6feb6d9bd29204819be08334c78249e359013db Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Sun, 4 Apr 2021 22:21:15 -0500
Subject: [PATCH 237/460] return info about unsatisfied deps from
 check_dependency_satisfaction(); also clean up code/comments

---
 loopy/schedule/checker/__init__.py | 107 ++++++++++++++++-------------
 test/test_linearization_checker.py |   7 +-
 2 files changed, 62 insertions(+), 52 deletions(-)

diff --git a/loopy/schedule/checker/__init__.py b/loopy/schedule/checker/__init__.py
index 0fcee4831..4ef952100 100644
--- a/loopy/schedule/checker/__init__.py
+++ b/loopy/schedule/checker/__init__.py
@@ -26,7 +26,7 @@
 def get_pairwise_statement_orderings(
         knl,
         lin_items,
-        insn_id_pairs,
+        stmt_id_pairs,
         ):
     r"""For each statement pair in a subset of all statement pairs found in a
     linearized kernel, determine the (relative) order in which the statement
@@ -44,10 +44,10 @@ def get_pairwise_statement_orderings(
         this routine during linearization, a truncated (i.e. partial)
         linearization may be passed through this argument.
 
-    :arg insn_id_pairs: A list containing pairs of instruction identifiers.
+    :arg stmt_id_pairs: A list containing pairs of instruction identifiers.
 
     :returns: A dictionary mapping each two-tuple of instruction identifiers
-        provided in `insn_id_pairs` to a :class:`collections.namedtuple`
+        provided in `stmt_id_pairs` to a :class:`collections.namedtuple`
         containing the intra-thread SIO (`sio_intra_thread`), intra-group SIO
         (`sio_intra_group`), and global SIO (`sio_global`), each realized
         as an :class:`islpy.Map` from each instance of the first
@@ -68,8 +68,8 @@ def get_pairwise_statement_orderings(
         >>> knl = lp.make_kernel(
         ...     "{[j,k]: 0<=j<pj and 0<=k<pk}",
         ...     [
-        ...         "a[j] = j  {id=insn_a}",
-        ...         "b[k] = k+a[0]  {id=insn_b,dep=insn_a}",
+        ...         "a[j] = j  {id=stmt_a}",
+        ...         "b[k] = k+a[0]  {id=stmt_b,dep=stmt_a}",
         ...     ])
         >>> knl = lp.add_and_infer_dtypes(knl, {"a": np.float32, "b": np.float32})
         >>> # Get a linearization
@@ -79,10 +79,10 @@ def get_pairwise_statement_orderings(
         >>> sio_dict = get_pairwise_statement_orderings(
         ...     knl,
         ...     knl.linearization,
-        ...     [("insn_a", "insn_b")],
+        ...     [("stmt_a", "stmt_b")],
         ...     )
         >>> # Print map
-        >>> print(str(sio_dict[("insn_a", "insn_b")].sio_intra_thread
+        >>> print(str(sio_dict[("stmt_a", "stmt_b")].sio_intra_thread
         ...     ).replace("{ ", "{\n").replace(" :", "\n:"))
         [pj, pk] -> {
         [_lp_linchk_stmt' = 0, j', k'] -> [_lp_linchk_stmt = 1, j, k]
@@ -129,7 +129,7 @@ def get_pairwise_statement_orderings(
     return get_pairwise_statement_orderings_inner(
         knl,
         lin_items,
-        insn_id_pairs,
+        stmt_id_pairs,
         loops_to_ignore=conc_loop_inames,
         )
 
@@ -142,6 +142,7 @@ def check_dependency_satisfaction(
         knl,
         linearization_items,
         ):
+
     # TODO document
 
     from loopy.schedule.checker.utils import (
@@ -150,7 +151,7 @@ def check_dependency_satisfaction(
 
     # {{{ make sure kernel has been preprocessed
 
-    # note: kernels must always be preprocessed before scheduling
+    # Note: kernels must always be preprocessed before scheduling
     from loopy.kernel import KernelState
     assert knl.state in [
             KernelState.PREPROCESSED,
@@ -162,44 +163,61 @@ def check_dependency_satisfaction(
 
     # To minimize time complexity, all pairwise schedules will be created
     # in one pass, which first requires finding all pairs of statements involved
-    # in deps.
-    # So, since we have to find these pairs anyway, collect their deps at
-    # the same time so we don't have to do it again later during lin checking.
-
-    stmts_to_deps = {}
-    for insn_after in knl.instructions:
-        for before_id, dep_list in insn_after.dependencies.items():
-            stmts_to_deps.setdefault(
-                (before_id, insn_after.id), []).extend(dep_list)
+    # in deps. We will also need to collect the deps for each statement pair,
+    # so do this at the same time.
+
+    stmt_pairs_to_deps = {}
+
+    # stmt_pairs_to_deps:
+    # {(stmt_id_before1, stmt_id_after1): [dep1, dep2, ...],
+    #  (stmt_id_before2, stmt_id_after2): [dep1, dep2, ...],
+    #  ...}
+
+    for stmt_after in knl.instructions:
+        for before_id, dep_list in stmt_after.dependencies.items():
+            # (don't compare dep maps to maps found; duplicate deps should be rare)
+            stmt_pairs_to_deps.setdefault(
+                (before_id, stmt_after.id), []).extend(dep_list)
     # }}}
 
+    # {{{ Get statement instance orderings
+
     pworders = get_pairwise_statement_orderings(
         knl,
         linearization_items,
-        stmts_to_deps.keys(),
+        stmt_pairs_to_deps.keys(),
         )
 
-    # For each dependency, create+test linearization containing pair of insns------
-    linearization_is_valid = True
-    for (insn_id_before, insn_id_after), dependencies in stmts_to_deps.items():
+    # }}}
+
+    # {{{ For each depender-dependee pair of statements, check all deps vs. SIO
+
+    deps_are_satisfied = True
 
-        # Get pairwise ordering info for stmts involved in the dependency
-        pworder = pworders[(insn_id_before, insn_id_after)]
+    # Collect info about unsatisfied deps
+    unsatisfied_deps = []
+    from collections import namedtuple
+    UnsatisfiedDependencyInfo = namedtuple(
+        "UnsatisfiedDependencyInfo",
+        ["statement_pair", "dependency", "statement_ordering"])
 
-        # check each dep for this statement pair
+    for stmt_id_pair, dependencies in stmt_pairs_to_deps.items():
+
+        # Get the pairwise ordering info (includes SIOs)
+        pworder = pworders[stmt_id_pair]
+
+        # Check each dep for this statement pair
         for dependency in dependencies:
 
-            # reorder variables/params in constraint map space to match SIO so we can
+            # Align constraint map space to match SIO so we can
             # check to see whether the constraint map is a subset of the SIO
-            # (spaces must be aligned so that the variables in the constraint map
-            # correspond to the same variables in the SIO)
             from loopy.schedule.checker.utils import (
                 ensure_dim_names_match_and_align,
             )
-
             aligned_dep_map = ensure_dim_names_match_and_align(
                 dependency, pworder.sio_intra_thread)
 
+            # Spaces must match
             assert aligned_dep_map.space == pworder.sio_intra_thread.space
             assert aligned_dep_map.space == pworder.sio_intra_group.space
             assert aligned_dep_map.space == pworder.sio_global.space
@@ -210,29 +228,20 @@ def check_dependency_satisfaction(
             assert (aligned_dep_map.get_var_dict() ==
                 pworder.sio_global.get_var_dict())
 
+            # Check dependency
             if not aligned_dep_map.is_subset(
                     pworder.sio_intra_thread |
                     pworder.sio_intra_group |
                     pworder.sio_global
                     ):
 
-                linearization_is_valid = False
-
-                print("================ constraint check failure =================")
-                print("Constraint map not subset of SIO")
-                print("Dependencies:")
-                print(insn_id_before+"->"+insn_id_after)
-                print(prettier_map_string(dependency))
-                print("Statement instance ordering:")
-                print(prettier_map_string(pworder.sio_intra_thread))
-                print("dependency.gist(pworder.sio_intra_thread):")
-                print(prettier_map_string(
-                    aligned_dep_map.gist(pworder.sio_intra_thread)))
-                print("pworder.sio_intra_thread.gist(dependency)")
-                print(prettier_map_string(
-                    pworder.sio_intra_thread.gist(aligned_dep_map)))
-                print("Loop priority known:")
-                print(knl.loop_priority)
-                print("===========================================================")
-
-    return linearization_is_valid
+                deps_are_satisfied = False
+
+                unsatisfied_deps.append(
+                    UnsatisfiedDependencyInfo(stmt_id_pair, aligned_dep_map, pworder))
+
+                # Could break here if we don't care about remaining deps
+
+    # }}}
+
+    return deps_are_satisfied, unsatisfied_deps
diff --git a/test/test_linearization_checker.py b/test/test_linearization_checker.py
index 26075dc97..81000c8ae 100644
--- a/test/test_linearization_checker.py
+++ b/test/test_linearization_checker.py
@@ -1266,7 +1266,7 @@ def test_sios_and_schedules_with_vec_and_barriers():
 # }}}
 
 
-def test_add_stmt_inst_dependencies():
+def test_add_stmt_inst_dependency():
 
     lp.set_caching_enabled(False)
     # TODO REMOVE THIS^ (prevents
@@ -1374,10 +1374,11 @@ def test_add_stmt_inst_dependencies():
     lin_knl = get_one_linearized_kernel(proc_knl)
     lin_items = lin_knl.linearization
 
-    linearization_is_valid = lp.check_dependency_satisfaction(
+    deps_are_satisfied, unsatisfied_deps = lp.check_dependency_satisfaction(
         proc_knl, lin_items)
 
-    assert linearization_is_valid
+    assert deps_are_satisfied
+    assert not unsatisfied_deps
 
 
 # TODO create more kernels with valid/invalid linearizations to test checker

From daedf107a0ed6a2f02062af9c0d2c00bbc986360 Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Sun, 4 Apr 2021 22:29:25 -0500
Subject: [PATCH 238/460] rename
 check_dependency_satisfaction()->find_unsatisfied_dependencies(); eliminate
 redundant bool return value for dep satisfaction and instead just check if
 unsatisfied dep list is empty

---
 loopy/__init__.py                  |  4 ++--
 loopy/schedule/checker/__init__.py | 20 ++++++++------------
 test/test_linearization_checker.py |  3 +--
 3 files changed, 11 insertions(+), 16 deletions(-)

diff --git a/loopy/__init__.py b/loopy/__init__.py
index e7a66ed29..f94cbe021 100644
--- a/loopy/__init__.py
+++ b/loopy/__init__.py
@@ -123,7 +123,7 @@
 from loopy.schedule import (
     generate_loop_schedules, get_one_scheduled_kernel, get_one_linearized_kernel)
 from loopy.schedule.checker import (
-    check_dependency_satisfaction)
+    find_unsatisfied_dependencies)
 from loopy.statistics import (ToCountMap, CountGranularity,
         stringify_stats_mapping, Op, MemAccess, get_op_map, get_mem_access_map,
         get_synchronization_map, gather_access_footprints,
@@ -251,7 +251,7 @@
         "preprocess_kernel", "realize_reduction",
         "generate_loop_schedules",
         "get_one_scheduled_kernel", "get_one_linearized_kernel",
-        "check_dependency_satisfaction",
+        "find_unsatisfied_dependencies",
         "GeneratedProgram", "CodeGenerationResult",
         "PreambleInfo",
         "generate_code", "generate_code_v2", "generate_body",
diff --git a/loopy/schedule/checker/__init__.py b/loopy/schedule/checker/__init__.py
index 4ef952100..d418047bb 100644
--- a/loopy/schedule/checker/__init__.py
+++ b/loopy/schedule/checker/__init__.py
@@ -138,17 +138,15 @@ def get_pairwise_statement_orderings(
 # }}}
 
 
-def check_dependency_satisfaction(
+# {{{ find_unsatisfied_dependencies()
+
+def find_unsatisfied_dependencies(
         knl,
         linearization_items,
         ):
 
     # TODO document
 
-    from loopy.schedule.checker.utils import (
-        prettier_map_string,
-    )
-
     # {{{ make sure kernel has been preprocessed
 
     # Note: kernels must always be preprocessed before scheduling
@@ -192,8 +190,6 @@ def check_dependency_satisfaction(
 
     # {{{ For each depender-dependee pair of statements, check all deps vs. SIO
 
-    deps_are_satisfied = True
-
     # Collect info about unsatisfied deps
     unsatisfied_deps = []
     from collections import namedtuple
@@ -235,13 +231,13 @@ def check_dependency_satisfaction(
                     pworder.sio_global
                     ):
 
-                deps_are_satisfied = False
-
-                unsatisfied_deps.append(
-                    UnsatisfiedDependencyInfo(stmt_id_pair, aligned_dep_map, pworder))
+                unsatisfied_deps.append(UnsatisfiedDependencyInfo(
+                    stmt_id_pair, aligned_dep_map, pworder))
 
                 # Could break here if we don't care about remaining deps
 
     # }}}
 
-    return deps_are_satisfied, unsatisfied_deps
+    return unsatisfied_deps
+
+# }}}
diff --git a/test/test_linearization_checker.py b/test/test_linearization_checker.py
index 81000c8ae..700c443f5 100644
--- a/test/test_linearization_checker.py
+++ b/test/test_linearization_checker.py
@@ -1374,10 +1374,9 @@ def test_add_stmt_inst_dependency():
     lin_knl = get_one_linearized_kernel(proc_knl)
     lin_items = lin_knl.linearization
 
-    deps_are_satisfied, unsatisfied_deps = lp.check_dependency_satisfaction(
+    unsatisfied_deps = lp.find_unsatisfied_dependencies(
         proc_knl, lin_items)
 
-    assert deps_are_satisfied
     assert not unsatisfied_deps
 
 

From ff0155e75a21063df2b004e8ff1ae7926fbbd361 Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Sun, 4 Apr 2021 22:33:30 -0500
Subject: [PATCH 239/460] var renaming instruction->statement

---
 loopy/schedule/checker/__init__.py | 18 ++++-----
 loopy/schedule/checker/schedule.py | 62 +++++++++++++++---------------
 2 files changed, 40 insertions(+), 40 deletions(-)

diff --git a/loopy/schedule/checker/__init__.py b/loopy/schedule/checker/__init__.py
index 6a2ecb9c5..5a492660b 100644
--- a/loopy/schedule/checker/__init__.py
+++ b/loopy/schedule/checker/__init__.py
@@ -26,7 +26,7 @@
 def get_pairwise_statement_orderings(
         knl,
         lin_items,
-        insn_id_pairs,
+        stmt_id_pairs,
         ):
     r"""For each statement pair in a subset of all statement pairs found in a
     linearized kernel, determine the (relative) order in which the statement
@@ -44,10 +44,10 @@ def get_pairwise_statement_orderings(
         this routine during linearization, a truncated (i.e. partial)
         linearization may be passed through this argument.
 
-    :arg insn_id_pairs: A list containing pairs of instruction identifiers.
+    :arg stmt_id_pairs: A list containing pairs of statement identifiers.
 
-    :returns: A dictionary mapping each two-tuple of instruction identifiers
-        provided in `insn_id_pairs` to a :class:`collections.namedtuple`
+    :returns: A dictionary mapping each two-tuple of statement identifiers
+        provided in `stmt_id_pairs` to a :class:`collections.namedtuple`
         containing the intra-thread SIO (`sio_intra_thread`), intra-group SIO
         (`sio_intra_group`), and global SIO (`sio_global`), each realized
         as an :class:`islpy.Map` from each instance of the first
@@ -68,8 +68,8 @@ def get_pairwise_statement_orderings(
         >>> knl = lp.make_kernel(
         ...     "{[j,k]: 0<=j<pj and 0<=k<pk}",
         ...     [
-        ...         "a[j] = j  {id=insn_a}",
-        ...         "b[k] = k+a[0]  {id=insn_b,dep=insn_a}",
+        ...         "a[j] = j  {id=stmt_a}",
+        ...         "b[k] = k+a[0]  {id=stmt_b,dep=stmt_a}",
         ...     ])
         >>> knl = lp.add_and_infer_dtypes(knl, {"a": np.float32, "b": np.float32})
         >>> # Get a linearization
@@ -79,10 +79,10 @@ def get_pairwise_statement_orderings(
         >>> sio_dict = get_pairwise_statement_orderings(
         ...     knl,
         ...     knl.linearization,
-        ...     [("insn_a", "insn_b")],
+        ...     [("stmt_a", "stmt_b")],
         ...     )
         >>> # Print map
-        >>> print(str(sio_dict[("insn_a", "insn_b")].sio_intra_thread
+        >>> print(str(sio_dict[("stmt_a", "stmt_b")].sio_intra_thread
         ...     ).replace("{ ", "{\n").replace(" :", "\n:"))
         [pj, pk] -> {
         [_lp_linchk_stmt' = 0, j', k'] -> [_lp_linchk_stmt = 1, j, k]
@@ -129,7 +129,7 @@ def get_pairwise_statement_orderings(
     return get_pairwise_statement_orderings_inner(
         knl,
         lin_items,
-        insn_id_pairs,
+        stmt_id_pairs,
         loops_to_ignore=conc_loop_inames,
         )
 
diff --git a/loopy/schedule/checker/schedule.py b/loopy/schedule/checker/schedule.py
index 4a8d1a479..6b509f694 100644
--- a/loopy/schedule/checker/schedule.py
+++ b/loopy/schedule/checker/schedule.py
@@ -193,7 +193,7 @@ class SpecialLexPointWRTLoop:
 def get_pairwise_statement_orderings_inner(
         knl,
         lin_items,
-        insn_id_pairs,
+        stmt_id_pairs,
         loops_to_ignore=set(),
         ):
     r"""For each statement pair in a subset of all statement pairs found in a
@@ -216,15 +216,15 @@ def get_pairwise_statement_orderings_inner(
         truncated (i.e. partial) linearization may be passed through this
         argument
 
-    :arg insn_id_pairs: A list containing pairs of instruction identifiers.
+    :arg stmt_id_pairs: A list containing pairs of statement identifiers.
 
     :arg loops_to_ignore: A set of inames that will be ignored when
         determining the relative ordering of statements. This will typically
         contain concurrent inames tagged with the ``vec`` or ``ilp`` array
         access tags.
 
-    :returns: A dictionary mapping each two-tuple of instruction identifiers
-        provided in `insn_id_pairs` to a :class:`collections.namedtuple`
+    :returns: A dictionary mapping each two-tuple of statement identifiers
+        provided in `stmt_id_pairs` to a :class:`collections.namedtuple`
         containing the intra-thread SIO (`sio_intra_thread`), intra-group SIO
         (`sio_intra_group`), and global SIO (`sio_global`), each realized
         as an :class:`islpy.Map` from each instance of the first
@@ -254,15 +254,15 @@ def get_pairwise_statement_orderings_inner(
     )
     slex = SpecialLexPointWRTLoop
 
-    all_insn_ids = set().union(*insn_id_pairs)
+    all_stmt_ids = set().union(*stmt_id_pairs)
 
     # {{{ Intra-thread lex order creation
 
     # First, use one pass through lin_items to generate an *intra-thread*
     # lexicographic ordering describing the relative order of all statements
-    # represented by all_insn_ids
+    # represented by all_stmt_ids
 
-    # For each statement, map the insn_id to a tuple representing points
+    # For each statement, map the stmt_id to a tuple representing points
     # in the intra-thread lexicographic ordering containing items of :class:`int` or
     # :class:`str` :mod:`loopy` inames
     stmt_inst_to_lex_intra_thread = {}
@@ -317,22 +317,22 @@ def get_pairwise_statement_orderings_inner(
             # in the simplification step below)
 
         elif isinstance(lin_item, RunInstruction):
-            lp_insn_id = lin_item.insn_id
+            lp_stmt_id = lin_item.insn_id
 
-            # Only process listed insns, otherwise ignore
-            if lp_insn_id in all_insn_ids:
+            # Only process listed stmts, otherwise ignore
+            if lp_stmt_id in all_stmt_ids:
                 # Add item to stmt_inst_to_lex_intra_thread
-                stmt_inst_to_lex_intra_thread[lp_insn_id] = tuple(next_lex_tuple)
+                stmt_inst_to_lex_intra_thread[lp_stmt_id] = tuple(next_lex_tuple)
 
                 # Increment lex dim val enumerating items in current section of code
                 next_lex_tuple[-1] += 1
 
         elif isinstance(lin_item, Barrier):
-            lp_insn_id = lin_item.originating_insn_id
+            lp_stmt_id = lin_item.originating_insn_id
             loops_with_barriers[lin_item.synchronization_kind] |= current_inames
 
-            if lp_insn_id is None:
-                # Barriers without insn ids were inserted as a result of a
+            if lp_stmt_id is None:
+                # Barriers without stmt ids were inserted as a result of a
                 # dependency. They don't themselves have dependencies. Ignore them.
 
                 # FIXME: It's possible that we could record metadata about them
@@ -341,10 +341,10 @@ def get_pairwise_statement_orderings_inner(
 
                 continue
 
-            # If barrier was identified in listed insns, process it
-            if lp_insn_id in all_insn_ids:
+            # If barrier was identified in listed stmts, process it
+            if lp_stmt_id in all_stmt_ids:
                 # Add item to stmt_inst_to_lex_intra_thread
-                stmt_inst_to_lex_intra_thread[lp_insn_id] = tuple(next_lex_tuple)
+                stmt_inst_to_lex_intra_thread[lp_stmt_id] = tuple(next_lex_tuple)
 
                 # Increment lex dim val enumerating items in current section of code
                 next_lex_tuple[-1] += 1
@@ -739,11 +739,11 @@ def _create_blex_set_from_tuple_pair(before, after, wrap_cond=False):
     # {{{ _get_map_for_stmt()
 
     def _get_map_for_stmt(
-            insn_id, lex_points, int_sid, lex_dim_names):
+            stmt_id, lex_points, int_sid, lex_dim_names):
 
         # Get inames domain for statement instance (a BasicSet)
         dom = knl.get_inames_domain(
-            knl.id_to_insn[insn_id].within_inames)
+            knl.id_to_insn[stmt_id].within_inames)
         # (note that this domain may include inames that are
         # not in stmt.within_inames)
 
@@ -805,18 +805,18 @@ def _get_map_for_stmt(
         ])
     # ("sio" = statement instance ordering; "pwsched" = pairwise schedule)
 
-    for insn_ids in insn_id_pairs:
+    for stmt_ids in stmt_id_pairs:
         # Determine integer IDs that will represent each statement in mapping
         # (dependency map creation assumes sid_before=0 and sid_after=1, unless
         # before and after refer to same stmt, in which case
         # sid_before=sid_after=0)
-        int_sids = [0, 0] if insn_ids[0] == insn_ids[1] else [0, 1]
+        int_sids = [0, 0] if stmt_ids[0] == stmt_ids[1] else [0, 1]
 
         # {{{  Create SIO for intra-thread case (lid0' == lid0, gid0' == gid0, etc)
 
         # Simplify tuples to the extent possible ------------------------------------
 
-        lex_tuples = [stmt_inst_to_lex_intra_thread[insn_id] for insn_id in insn_ids]
+        lex_tuples = [stmt_inst_to_lex_intra_thread[stmt_id] for stmt_id in stmt_ids]
 
         # At this point, one of the lex tuples may have more dimensions than
         # another; the missing dims are the fastest-updating dims, and their
@@ -836,10 +836,10 @@ def _get_map_for_stmt(
 
         intra_thread_sched_maps = [
             _get_map_for_stmt(
-                insn_id, lex_tuple, int_sid,
+                stmt_id, lex_tuple, int_sid,
                 seq_lex_dim_names+all_par_lex_dim_names)
-            for insn_id, lex_tuple, int_sid
-            in zip(insn_ids, lex_tuples_simplified, int_sids)
+            for stmt_id, lex_tuple, int_sid
+            in zip(stmt_ids, lex_tuples_simplified, int_sids)
             ]
 
         # Create pairwise lex order map (pairwise only in the intra-thread case)
@@ -873,17 +873,17 @@ def _get_map_for_stmt(
         def _get_sched_maps_and_sio(
                 stmt_inst_to_blex, blex_order_map, seq_blex_dim_names):
             # (Vars from outside func used here:
-            # insn_ids, int_sids, all_par_lex_dim_names)
+            # stmt_ids, int_sids, all_par_lex_dim_names)
 
             # Use *unsimplified* lex tuples w/ blex map, which are already padded
-            blex_tuples_padded = [stmt_inst_to_blex[insn_id] for insn_id in insn_ids]
+            blex_tuples_padded = [stmt_inst_to_blex[stmt_id] for stmt_id in stmt_ids]
 
             par_sched_maps = [
                 _get_map_for_stmt(
-                    insn_id, blex_tuple, int_sid,
+                    stmt_id, blex_tuple, int_sid,
                     seq_blex_dim_names+all_par_lex_dim_names)  # all par names
-                for insn_id, blex_tuple, int_sid
-                in zip(insn_ids, blex_tuples_padded, int_sids)
+                for stmt_id, blex_tuple, int_sid
+                in zip(stmt_ids, blex_tuples_padded, int_sids)
                 ]
 
             # Create statement instance ordering
@@ -903,7 +903,7 @@ def _get_sched_maps_and_sio(
         # }}}
 
         # Store sched maps along with SIOs
-        pairwise_sios[tuple(insn_ids)] = StatementOrdering(
+        pairwise_sios[tuple(stmt_ids)] = StatementOrdering(
             sio_intra_thread=sio_intra_thread,
             pwsched_intra_thread=tuple(intra_thread_sched_maps),
             sio_intra_group=sio_intra_group,

From 0446d3db0bd7476382aa141c3c852fe7f999f1f3 Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Sun, 4 Apr 2021 22:37:00 -0500
Subject: [PATCH 240/460] renaming instruction->statement

---
 loopy/schedule/checker/__init__.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/loopy/schedule/checker/__init__.py b/loopy/schedule/checker/__init__.py
index 8e0d9db1f..82f9cb5fc 100644
--- a/loopy/schedule/checker/__init__.py
+++ b/loopy/schedule/checker/__init__.py
@@ -157,7 +157,7 @@ def find_unsatisfied_dependencies(
 
     # }}}
 
-    # {{{ Create map from dependent instruction id pairs to dependencies
+    # {{{ Create map from dependent statement id pairs to dependencies
 
     # To minimize time complexity, all pairwise schedules will be created
     # in one pass, which first requires finding all pairs of statements involved

From 03fa0c97f43012d604dfa5771931ec0ff5f49481 Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Mon, 5 Apr 2021 04:45:38 -0500
Subject: [PATCH 241/460] add update_for_Map to LoopyKeyBuilder

---
 loopy/tools.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/loopy/tools.py b/loopy/tools.py
index 5be4ca6b5..bdba32b90 100644
--- a/loopy/tools.py
+++ b/loopy/tools.py
@@ -78,6 +78,8 @@ def update_for_BasicSet(self, key_hash, key):  # noqa
         getattr(prn, "print_"+key._base_name)(key)
         key_hash.update(prn.get_str().encode("utf8"))
 
+    update_for_Map = update_for_BasicSet  # noqa
+
     def update_for_type(self, key_hash, key):
         try:
             method = getattr(self, "update_for_type_"+key.__name__)

From 1e6af20937270eb05776e28b9346198411c03f4f Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Mon, 5 Apr 2021 05:49:27 -0500
Subject: [PATCH 242/460] rename
 append_marker_to_isl_map_var_names()->append_mark_to_isl_map_var_names();
 rename append_marker_to_strings()->append_mark_to_strings(); rename
 marker->mark; make mark argument required instead of having a default value

---
 loopy/schedule/checker/utils.py | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/loopy/schedule/checker/utils.py b/loopy/schedule/checker/utils.py
index 9382d070a..3c3f5184f 100644
--- a/loopy/schedule/checker/utils.py
+++ b/loopy/schedule/checker/utils.py
@@ -106,8 +106,8 @@ def add_eq_isl_constraint_from_names(isl_map, var1, var2):
                    {1: 0, var1: 1, var2: -1}))
 
 
-def append_marker_to_isl_map_var_names(old_isl_map, dim_type, marker="'"):
-    """Return an :class:`islpy.Map` with a marker appended to the specified
+def append_mark_to_isl_map_var_names(old_isl_map, dim_type, mark):
+    """Return an :class:`islpy.Map` with a mark appended to the specified
     dimension names.
 
     :arg old_isl_map: An :class:`islpy.Map`.
@@ -115,24 +115,24 @@ def append_marker_to_isl_map_var_names(old_isl_map, dim_type, marker="'"):
     :arg dim_type: An :class:`islpy.dim_type`, i.e., an :class:`int`,
         specifying the dimension to be marked.
 
-    :arg marker: A :class:`str` to be appended to the specified dimension
-        names. If not provided, `marker` defaults to an apostrophe.
+    :arg mark: A :class:`str` to be appended to the specified dimension
+        names. If not provided, `mark` defaults to an apostrophe.
 
     :returns: An :class:`islpy.Map` matching `old_isl_map` with
-        `marker` appended to the `dim_type` dimension names.
+        `mark` appended to the `dim_type` dimension names.
 
     """
 
     new_map = old_isl_map.copy()
     for i in range(len(old_isl_map.get_var_names(dim_type))):
         new_map = new_map.set_dim_name(dim_type, i, old_isl_map.get_dim_name(
-            dim_type, i)+marker)
+            dim_type, i)+mark)
     return new_map
 
 
-def append_marker_to_strings(strings, marker="'"):
+def append_mark_to_strings(strings, mark):
     assert isinstance(strings, list)
-    return [s+marker for s in strings]
+    return [s+mark for s in strings]
 
 
 def sorted_union_of_names_in_isl_sets(

From 70aa8fc6f6595b90e67e53988f21699bebea29c5 Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Mon, 5 Apr 2021 05:54:34 -0500
Subject: [PATCH 243/460] (includes function signature changes) reorder args in
 get_lex_order_set(); remove redundant n_dims arg from create_lex_order_map();
 rename marker->mark; make make mark argument required instead of having a
 default value

---
 .../checker/lexicographic_order_map.py        | 56 ++++++++-----------
 1 file changed, 23 insertions(+), 33 deletions(-)

diff --git a/loopy/schedule/checker/lexicographic_order_map.py b/loopy/schedule/checker/lexicographic_order_map.py
index 20f889975..0a01f888c 100644
--- a/loopy/schedule/checker/lexicographic_order_map.py
+++ b/loopy/schedule/checker/lexicographic_order_map.py
@@ -25,7 +25,7 @@
 
 
 def get_statement_ordering_map(
-        sched_before, sched_after, lex_map, before_marker="'"):
+        sched_before, sched_after, lex_map, before_mark):
     """Return a statement ordering represented as a map from each statement
         instance to all statement instances occurring later.
 
@@ -47,7 +47,7 @@ def get_statement_ordering_map(
                 i0' < i0 or (i0' = i0 and i1' < i1)
                 or (i0' = i0 and i1' = i1 and i2' < i2) ...}
 
-    :arg before_marker: A :class:`str` to be appended to the names of the
+    :arg before_mark: A :class:`str` to be appended to the names of the
         map dimensions representing the 'before' statement in the
         'happens before' relationship.
 
@@ -63,18 +63,18 @@ def get_statement_ordering_map(
     sio = sched_before.apply_range(
         lex_map).apply_range(sched_after.reverse())
 
-    # Append marker to in_ dims
+    # Append mark to in_ dims
     from loopy.schedule.checker.utils import (
-        append_marker_to_isl_map_var_names,
+        append_mark_to_isl_map_var_names,
     )
-    return append_marker_to_isl_map_var_names(
-        sio, isl.dim_type.in_, before_marker)
+    return append_mark_to_isl_map_var_names(
+        sio, isl.dim_type.in_, before_mark)
 
 
 def get_lex_order_set(
         dim_names,
+        in_dim_mark,
         islvars=None,
-        in_dim_marker="'",
         ):
     """Return an :class:`islpy.Set` representing a lexicographic ordering
         over a space with the number of dimensions provided in `dim_names`
@@ -85,26 +85,26 @@ def get_lex_order_set(
         to describe lexicographic space dimensions for a point in a lexicographic
         ordering. (see example below)
 
+    :arg in_dim_mark: A :class:`str` to be appended to dimension names to
+        distinguish corresponding dimensions in before-after pairs of points.
+        (see example below)
+
     :arg islvars: A dictionary mapping variable names in `dim_names` to
         :class:`islpy.PwAff` instances that represent each of the variables
         (islvars may be produced by `islpy.make_zero_and_vars`).
         The key '0' is also include and represents a :class:`islpy.PwAff` zero
         constant. This dictionary defines the space to be used for the set and
-        must also include versions of `dim_names` with the `in_dim_marker`
+        must also include versions of `dim_names` with the `in_dim_mark`
         appended. If no value is passed, the dictionary will be made using
-        `dim_names` and `dim_names` with the `in_dim_marker` appended.
-
-    :arg in_dim_marker: A :class:`str` to be appended to dimension names to
-        distinguish corresponding dimensions in before-after pairs of points.
-        (see example below)
+        `dim_names` and `dim_names` with the `in_dim_mark` appended.
 
     :returns: An :class:`islpy.Set` representing a big-endian lexicographic
         ordering with the number of dimensions provided in `dim_names`. The set
         has two dimensions for each name in `dim_names`, one identified by the
-        given name and another identified by the same name with `in_dim_marker`
+        given name and another identified by the same name with `in_dim_mark`
         appended. The set contains all points which meet a 'happens before'
         constraint defining the lexicographic ordering. E.g., if
-        `dim_names = [i0, i1, i2]` and `in_dim_marker="'"`,
+        `dim_names = [i0, i1, i2]` and `in_dim_mark="'"`,
         return the set containing all points in a 3-dimensional, big-endian
         lexicographic ordering such that point
         `[i0', i1', i2']` happens before `[i0, i1, i2]`. I.e., return::
@@ -116,10 +116,10 @@ def get_lex_order_set(
     """
 
     from loopy.schedule.checker.utils import (
-        append_marker_to_strings,
+        append_mark_to_strings,
     )
 
-    in_dim_names = append_marker_to_strings(dim_names, marker=in_dim_marker)
+    in_dim_names = append_mark_to_strings(dim_names, mark=in_dim_mark)
 
     # If no islvars passed, make them using the names provided
     # (make sure to pass var names in desired order of space dims)
@@ -156,21 +156,16 @@ def get_lex_order_set(
 
 
 def create_lex_order_map(
-        n_dims=None,
-        dim_names=None,
-        in_dim_marker="'",
+        dim_names,
+        in_dim_mark,
         ):
     """Return a map from each point in a lexicographic ordering to every
         point that occurs later in the lexicographic ordering.
 
-    :arg n_dims: An :class:`int` representing the number of dimensions
-        in the lexicographic ordering. If not provided, `n_dims` will be
-        set to length of `dim_names`.
-
     :arg dim_names: A list of :class:`str` variable names for the
         lexicographic space dimensions.
 
-    :arg in_dim_marker: A :class:`str` to be appended to `dim_names` to create
+    :arg in_dim_mark: A :class:`str` to be appended to `dim_names` to create
         the names for the input dimensions of the map, thereby distinguishing
         them from the corresponding output dimensions in before-after pairs of
         points. (see example below)
@@ -178,7 +173,7 @@ def create_lex_order_map(
     :returns: An :class:`islpy.Map` representing a lexicographic
         ordering as a mapping from each point in lexicographic time
         to every point that occurs later in lexicographic time.
-        E.g., if `dim_names = [i0, i1, i2]` and `in_dim_marker = "'"`,
+        E.g., if `dim_names = [i0, i1, i2]` and `in_dim_mark = "'"`,
         return the map::
 
             {[i0', i1', i2'] -> [i0, i1, i2] :
@@ -187,18 +182,13 @@ def create_lex_order_map(
 
     """
 
-    if dim_names is None:
-        dim_names = ["i%s" % (i) for i in range(n_dims)]
-    if n_dims is None:
-        n_dims = len(dim_names)
-
-    assert len(dim_names) == n_dims
+    n_dims = len(dim_names)
     dim_type = isl.dim_type
 
     # First, get a set representing the lexicographic ordering.
     lex_order_set = get_lex_order_set(
         dim_names,
-        in_dim_marker=in_dim_marker,
+        in_dim_mark=in_dim_mark,
         )
 
     # Now convert that set to a map.

From 2ca1a8e383569e2c97049ed55a62f883826a24bc Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Mon, 5 Apr 2021 05:55:27 -0500
Subject: [PATCH 244/460] rename marker->mark and pass mark into funcs where it
 is now required

---
 loopy/schedule/checker/schedule.py | 22 ++++++++++++----------
 1 file changed, 12 insertions(+), 10 deletions(-)

diff --git a/loopy/schedule/checker/schedule.py b/loopy/schedule/checker/schedule.py
index 6b509f694..d9029bfdf 100644
--- a/loopy/schedule/checker/schedule.py
+++ b/loopy/schedule/checker/schedule.py
@@ -246,7 +246,7 @@ def get_pairwise_statement_orderings_inner(
     )
     from loopy.schedule.checker.utils import (
         add_and_name_isl_dims,
-        append_marker_to_strings,
+        append_mark_to_strings,
         add_eq_isl_constraint_from_names,
         sorted_union_of_names_in_isl_sets,
         create_symbolic_map_from_tuples,
@@ -556,20 +556,21 @@ def _gather_blex_ordering_info(sync_kind):
         # Create names for the blex dimensions for sequential loops
         seq_blex_dim_names = [
             LEX_VAR_PREFIX+str(i) for i in range(n_seq_blex_dims)]
-        seq_blex_dim_names_prime = append_marker_to_strings(
-            seq_blex_dim_names, marker=BEFORE_MARK)
+        seq_blex_dim_names_prime = append_mark_to_strings(
+            seq_blex_dim_names, mark=BEFORE_MARK)
 
         # Begin with the blex order map created as a standard lexicographical order
         blex_order_map = create_lex_order_map(
             dim_names=seq_blex_dim_names,
-            in_dim_marker=BEFORE_MARK,
+            in_dim_mark=BEFORE_MARK,
             )
 
         # Add LID/GID dims to blex order map
         blex_order_map = add_and_name_isl_dims(
             blex_order_map, dt.out, all_par_lex_dim_names)
         blex_order_map = add_and_name_isl_dims(
-            blex_order_map, dt.in_, append_marker_to_strings(all_par_lex_dim_names))
+            blex_order_map, dt.in_,
+            append_mark_to_strings(all_par_lex_dim_names, mark=BEFORE_MARK))
         if sync_kind == "local":
             # For intra-group case, constrain GID 'before' to equal GID 'after'
             for var_name in gid_lex_dim_names:
@@ -628,7 +629,7 @@ def _create_blex_set_from_tuple_pair(before, after, wrap_cond=False):
                 # Start with a set representing blex_order_map space
                 blex_set = blex_set_template.copy()
 
-                # Add markers to inames in the 'before' tuple
+                # Add marks to inames in the 'before' tuple
                 # (all strings should be inames)
                 before_prime = tuple(
                     v+BEFORE_MARK if isinstance(v, str) else v for v in before)
@@ -845,14 +846,15 @@ def _get_map_for_stmt(
         # Create pairwise lex order map (pairwise only in the intra-thread case)
         lex_order_map = create_lex_order_map(
             dim_names=seq_lex_dim_names,
-            in_dim_marker=BEFORE_MARK,
+            in_dim_mark=BEFORE_MARK,
             )
 
         # Add lid/gid dims to lex order map
         lex_order_map = add_and_name_isl_dims(
             lex_order_map, dt.out, all_par_lex_dim_names)
         lex_order_map = add_and_name_isl_dims(
-            lex_order_map, dt.in_, append_marker_to_strings(all_par_lex_dim_names))
+            lex_order_map, dt.in_,
+            append_mark_to_strings(all_par_lex_dim_names, mark=BEFORE_MARK))
         # Constrain lid/gid vars to be equal
         for var_name in all_par_lex_dim_names:
             lex_order_map = add_eq_isl_constraint_from_names(
@@ -863,7 +865,7 @@ def _get_map_for_stmt(
         sio_intra_thread = get_statement_ordering_map(
             *intra_thread_sched_maps,  # note, func accepts exactly two maps
             lex_order_map,
-            before_marker=BEFORE_MARK,
+            before_mark=BEFORE_MARK,
             )
 
         # }}}
@@ -890,7 +892,7 @@ def _get_sched_maps_and_sio(
             sio_par = get_statement_ordering_map(
                 *par_sched_maps,  # note, func accepts exactly two maps
                 blex_order_map,
-                before_marker=BEFORE_MARK,
+                before_mark=BEFORE_MARK,
                 )
 
             return par_sched_maps, sio_par

From 2b8156e915fac9a379772119bd42e0012cfae3af Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Mon, 5 Apr 2021 05:59:29 -0500
Subject: [PATCH 245/460] for legibility of tests, allow test map strings to
 use apostrophe as a placeholder for the before-mark even when BEFORE_MARK is
 set to something else (by replacing the placeholder with BEFORE_MARK in the
 strings before creating the maps)

---
 test/test_linearization_checker.py | 27 ++++++++++++++++++++-------
 1 file changed, 20 insertions(+), 7 deletions(-)

diff --git a/test/test_linearization_checker.py b/test/test_linearization_checker.py
index 7c2272c82..d61925414 100644
--- a/test/test_linearization_checker.py
+++ b/test/test_linearization_checker.py
@@ -80,13 +80,24 @@ def _lex_point_string(dim_vals, lid_inames=[], gid_inames=[]):
         )
 
 
-def _isl_map_with_marked_dims(s):
+def _isl_map_with_marked_dims(s, placeholder_mark="'"):
+    # For creating legible tests, map strings may be created with a placeholder
+    # for the 'before' mark. Replace this placeholder with BEFORE_MARK before
+    # creating the map.
+    # ALSO, if BEFORE_MARK == "'", ISL will ignore this mark when creating
+    # variable names, so it must be added manually.
     from loopy.schedule.checker.utils import (
-        append_marker_to_isl_map_var_names,
+        append_mark_to_isl_map_var_names,
     )
     dt = isl.dim_type
-    # Isl ignores the apostrophes in map strings, until they are explicitly added
-    return append_marker_to_isl_map_var_names(isl.Map(s), dt.in_, BEFORE_MARK)
+    if BEFORE_MARK == "'":
+        # ISL will ignore the apostrophe; manually name the in_ vars
+        return append_mark_to_isl_map_var_names(
+            isl.Map(s.replace(placeholder_mark, BEFORE_MARK)),
+            dt.in_,
+            BEFORE_MARK)
+    else:
+        return isl.Map(s.replace(placeholder_mark, BEFORE_MARK))
 
 
 def _check_orderings_for_stmt_pair(
@@ -455,8 +466,8 @@ def test_lex_order_map_creation():
     def _check_lex_map(exp_lex_order_map, n_dims):
 
         lex_order_map = create_lex_order_map(
-            n_dims=n_dims,
             dim_names=["%s%d" % (LEX_VAR_PREFIX, i) for i in range(n_dims)],
+            in_dim_mark=BEFORE_MARK,
             )
 
         assert lex_order_map == exp_lex_order_map
@@ -909,7 +920,8 @@ def test_sios_and_schedules_with_barriers():
             ij_end_val,
             conc_iname_bound_str,
             conc_iname_bound_str_p,
-            ))
+            )
+        )
     wanted_pairs = ensure_dim_names_match_and_align(
         wanted_pairs, order_info.sio_intra_group)
 
@@ -931,7 +943,8 @@ def test_sios_and_schedules_with_barriers():
             ij_end_val,
             conc_iname_bound_str,
             conc_iname_bound_str_p,
-            ))
+            )
+        )
     unwanted_pairs = ensure_dim_names_match_and_align(
         unwanted_pairs, order_info.sio_intra_group)
 

From 50332c37e09cbceff4da7ecdd20a28e2d7bb5bab Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Mon, 5 Apr 2021 06:05:12 -0500
Subject: [PATCH 246/460] change BEFORE_MARK to underscore since ISL ignores
 apostrophes; now caching works

---
 loopy/schedule/checker/__init__.py | 4 ++--
 loopy/schedule/checker/schedule.py | 2 +-
 test/test_linearization_checker.py | 5 -----
 3 files changed, 3 insertions(+), 8 deletions(-)

diff --git a/loopy/schedule/checker/__init__.py b/loopy/schedule/checker/__init__.py
index 82f9cb5fc..5811f3048 100644
--- a/loopy/schedule/checker/__init__.py
+++ b/loopy/schedule/checker/__init__.py
@@ -85,8 +85,8 @@ def get_pairwise_statement_orderings(
         >>> print(str(sio_dict[("stmt_a", "stmt_b")].sio_intra_thread
         ...     ).replace("{ ", "{\n").replace(" :", "\n:"))
         [pj, pk] -> {
-        [_lp_linchk_stmt' = 0, j', k'] -> [_lp_linchk_stmt = 1, j, k]
-        : 0 <= j' < pj and 0 <= k' < pk and 0 <= j < pj and 0 <= k < pk }
+        [_lp_linchk_stmt_ = 0, j_, k_] -> [_lp_linchk_stmt = 1, j, k]
+        : 0 <= j_ < pj and 0 <= k_ < pk and 0 <= j < pj and 0 <= k < pk }
 
     """
 
diff --git a/loopy/schedule/checker/schedule.py b/loopy/schedule/checker/schedule.py
index d9029bfdf..afa0b47a2 100644
--- a/loopy/schedule/checker/schedule.py
+++ b/loopy/schedule/checker/schedule.py
@@ -72,7 +72,7 @@
 for par_level in [0, 1, 2]:
     LTAG_VAR_NAMES.append("%slid%d" % (LIN_CHECK_IDENTIFIER_PREFIX, par_level))
     GTAG_VAR_NAMES.append("%sgid%d" % (LIN_CHECK_IDENTIFIER_PREFIX, par_level))
-BEFORE_MARK = "'"
+BEFORE_MARK = "_"
 
 # }}}
 
diff --git a/test/test_linearization_checker.py b/test/test_linearization_checker.py
index 6547bb50e..56854ae94 100644
--- a/test/test_linearization_checker.py
+++ b/test/test_linearization_checker.py
@@ -1281,11 +1281,6 @@ def test_sios_and_schedules_with_vec_and_barriers():
 
 def test_add_stmt_inst_dependency():
 
-    lp.set_caching_enabled(False)
-    # TODO REMOVE THIS^ (prevents
-    # TypeError: unsupported type for persistent hash keying:<class 'islpy._isl.Map'>
-    # ) during preprocessing
-
     # Make kernel and use OLD deps to linearize correctly for now
     i_range_str = "0 <= i < pi"
     i_range_str_p = "0 <= i' < pi"

From 31783ee367bfc306a782e9a8e4317c36382dda7e Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Fri, 9 Apr 2021 13:04:49 -0500
Subject: [PATCH 247/460] add todo

---
 loopy/schedule/checker/schedule.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/loopy/schedule/checker/schedule.py b/loopy/schedule/checker/schedule.py
index afa0b47a2..5444495a8 100644
--- a/loopy/schedule/checker/schedule.py
+++ b/loopy/schedule/checker/schedule.py
@@ -72,7 +72,7 @@
 for par_level in [0, 1, 2]:
     LTAG_VAR_NAMES.append("%slid%d" % (LIN_CHECK_IDENTIFIER_PREFIX, par_level))
     GTAG_VAR_NAMES.append("%sgid%d" % (LIN_CHECK_IDENTIFIER_PREFIX, par_level))
-BEFORE_MARK = "_"
+BEFORE_MARK = "_"  # TODO switch back to apostrophe after islpy is updated
 
 # }}}
 

From 4a69388d5c5cff574750b34fa68a344ca18fa309 Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Fri, 9 Apr 2021 15:19:33 -0500
Subject: [PATCH 248/460] add map_stmt_inst_dependencies()

---
 loopy/transform/instruction.py | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

diff --git a/loopy/transform/instruction.py b/loopy/transform/instruction.py
index b1e9cb7a5..0918f40fd 100644
--- a/loopy/transform/instruction.py
+++ b/loopy/transform/instruction.py
@@ -150,6 +150,22 @@ def _add_dep(stmt):
 
     return result
 
+
+# {{{ map_stmt_inst_dependencies
+
+def map_stmt_inst_dependencies(kernel, stmt_match, f):
+
+    def _update_dep(stmt):
+        new_deps = {}
+        for dep_id, deps in stmt.dependencies.items():
+            new_deps[dep_id] = [f(dep) for dep in deps]
+
+        return stmt.copy(dependencies=new_deps)
+
+    return map_instructions(kernel, stmt_match, _update_dep)
+
+# }}}
+
 # }}}
 
 

From 9b71b55e6c14955c86bb67a9e8a21df0b441d786 Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Fri, 9 Apr 2021 15:20:29 -0500
Subject: [PATCH 249/460] (WIP) split inames in dependencies when splitting
 inames; also remove inames from dependencies when removing unused inames

---
 loopy/transform/iname.py | 70 ++++++++++++++++++++++++++++++++++++++++
 1 file changed, 70 insertions(+)

diff --git a/loopy/transform/iname.py b/loopy/transform/iname.py
index 590e17afd..835813c40 100644
--- a/loopy/transform/iname.py
+++ b/loopy/transform/iname.py
@@ -254,6 +254,50 @@ def _split_iname_backend(kernel, iname_to_split,
             _split_iname_in_set(dom, iname_to_split, inner_iname, outer_iname,
                 fixed_length, fixed_length_is_inner)
             for dom in kernel.domains]
+    from loopy.transform.instruction import map_stmt_inst_dependencies
+
+    # {{{ split iname in deps
+
+    from loopy.schedule.checker.schedule import BEFORE_MARK
+    def _split_iname_in_dep(dep):
+
+        # Temporarily convert map to set for processing
+        # (TODO make generic func for this)
+        dt = isl.dim_type
+        n_in_dims = len(dep.get_var_names(dt.in_))
+        n_out_dims = len(dep.get_var_names(dt.out))
+        set_from_map = dep.move_dims(dt.in_, n_in_dims, dt.out, 0, n_out_dims).domain()
+
+        # Split iname
+        s = _split_iname_in_set(
+            set_from_map,
+            iname_to_split,
+            inner_iname,
+            outer_iname,
+            fixed_length,
+            fixed_length_is_inner)
+        s = _split_iname_in_set(
+            s,
+            iname_to_split+BEFORE_MARK,
+            inner_iname+BEFORE_MARK,
+            outer_iname+BEFORE_MARK,
+            fixed_length,
+            fixed_length_is_inner)
+
+        # now set looks like
+        # [... in_dims ..., ... out dims ..., i_outer, i_inner, i_outer', i_inner']
+
+        # Convert set back to map
+        map_from_set = isl.Map.from_domain(s)
+        # move original out dims + 2 new dims:
+        map_from_set = map_from_set.move_dims(
+            dt.out, 0, dt.in_, n_in_dims, n_out_dims+2)
+
+        return map_from_set
+
+    kernel = map_stmt_inst_dependencies(kernel, "id:*", _split_iname_in_dep)
+
+    # }}}
 
     from pymbolic import var
     inner = var(inner_iname)
@@ -1168,6 +1212,15 @@ def get_used_inames(kernel):
     return used_inames
 
 
+def remove_var_from_set(s, var):
+    try:
+        dt, idx = s.get_var_dict()[var]
+    except KeyError:
+        return s
+    else:
+        return s.project_out(dt, idx, 1)
+
+
 def remove_unused_inames(kernel, inames=None):
     """Delete those among *inames* that are unused, i.e. project them
     out of the domain. If these inames pose implicit restrictions on
@@ -1199,6 +1252,7 @@ def remove_unused_inames(kernel, inames=None):
         new_domains = []
 
         for dom in domains:
+            """
             try:
                 dt, idx = dom.get_var_dict()[iname]
             except KeyError:
@@ -1206,6 +1260,8 @@ def remove_unused_inames(kernel, inames=None):
             else:
                 dom = dom.project_out(dt, idx, 1)
             new_domains.append(dom)
+            """
+            new_domains.append(remove_var_from_set(dom, iname))
 
         domains = new_domains
 
@@ -1213,6 +1269,20 @@ def remove_unused_inames(kernel, inames=None):
 
     # }}}
 
+    # {{{ remove iname from deps
+
+    from loopy.transform.instruction import map_stmt_inst_dependencies
+    from loopy.schedule.checker.schedule import BEFORE_MARK
+    def _remove_iname_from_dep(dep):
+        pu.db
+        return remove_var_from_set(
+            remove_var_from_set(dep, iname), iname+BEFORE_MARK)
+
+    pu.db
+    kernel = map_stmt_inst_dependencies(kernel, "id:*", _remove_iname_from_dep)
+
+    # }}}
+
     return kernel
 
 

From 6504ae2a924e927dbad3bdc7ef7c7ed2fdf14fd8 Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Sat, 10 Apr 2021 18:29:55 -0500
Subject: [PATCH 250/460] don't assert that arg is list in
 append_mark_to_strings()

---
 loopy/schedule/checker/utils.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/loopy/schedule/checker/utils.py b/loopy/schedule/checker/utils.py
index 3c3f5184f..0c2ef4991 100644
--- a/loopy/schedule/checker/utils.py
+++ b/loopy/schedule/checker/utils.py
@@ -131,7 +131,6 @@ def append_mark_to_isl_map_var_names(old_isl_map, dim_type, mark):
 
 
 def append_mark_to_strings(strings, mark):
-    assert isinstance(strings, list)
     return [s+mark for s in strings]
 
 

From 9baf9fcf7abe37c4a1ead87b60fb1cbf1604037d Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Sat, 10 Apr 2021 20:31:44 -0500
Subject: [PATCH 251/460] create convert_map_to_set() function; set
 dt=isl.dim_type to make code prettier

---
 loopy/schedule/checker/utils.py | 33 ++++++++++++++++++++-------------
 1 file changed, 20 insertions(+), 13 deletions(-)

diff --git a/loopy/schedule/checker/utils.py b/loopy/schedule/checker/utils.py
index 0c2ef4991..8f72596b6 100644
--- a/loopy/schedule/checker/utils.py
+++ b/loopy/schedule/checker/utils.py
@@ -21,6 +21,7 @@
 """
 
 import islpy as isl
+dt = isl.dim_type
 
 
 def prettier_map_string(map_obj):
@@ -62,10 +63,10 @@ def reorder_dims_by_name(
 
     """
 
-    assert dim_type != isl.dim_type.param
+    assert dim_type != dt.param
     assert set(isl_set.get_var_names(dim_type)) == set(desired_dims_ordered)
 
-    other_dim_type = isl.dim_type.param
+    other_dim_type = dt.param
     other_dim_len = len(isl_set.get_var_names(other_dim_type))
 
     new_set = isl_set.copy()
@@ -89,7 +90,7 @@ def ensure_dim_names_match_and_align(obj_map, tgt_map):
     if not all(
             set(obj_map.get_var_names(dt)) == set(tgt_map.get_var_names(dt))
             for dt in
-            [isl.dim_type.in_, isl.dim_type.out, isl.dim_type.param]):
+            [dt.in_, dt.out, dt.param]):
         raise ValueError(
             "Cannot align spaces; names don't match:\n%s\n%s"
             % (prettier_map_string(obj_map), prettier_map_string(tgt_map))
@@ -136,7 +137,7 @@ def append_mark_to_strings(strings, mark):
 
 def sorted_union_of_names_in_isl_sets(
         isl_sets,
-        set_dim=isl.dim_type.set):
+        set_dim=dt.set):
     r"""Return a sorted list of the union of all variable names found in
     the provided :class:`islpy.Set`\ s.
     """
@@ -147,6 +148,14 @@ def sorted_union_of_names_in_isl_sets(
     return sorted(inames)
 
 
+def convert_map_to_set(isl_map):
+    n_in_dims = len(isl_map.get_var_names(dt.in_))
+    n_out_dims = len(isl_map.get_var_names(dt.out))
+    return isl_map.move_dims(
+        dt.in_, n_in_dims, dt.out, 0, n_out_dims
+        ).domain(), n_in_dims, n_out_dims
+
+
 def create_symbolic_map_from_tuples(
         tuple_pairs_with_domains,
         space,
@@ -175,16 +184,14 @@ def create_symbolic_map_from_tuples(
     """
     # TODO allow None for domains
 
-    dim_type = isl.dim_type
-
-    space_out_names = space.get_var_names(dim_type.out)
-    space_in_names = space.get_var_names(isl.dim_type.in_)
+    space_out_names = space.get_var_names(dt.out)
+    space_in_names = space.get_var_names(dt.in_)
 
     # Get islvars from space
     islvars = isl.affs_from_space(
         space.move_dims(
-            isl.dim_type.out, 0,
-            isl.dim_type.in_, 0,
+            dt.out, 0,
+            dt.in_, 0,
             len(space_in_names),
             ).range()
         )
@@ -204,7 +211,7 @@ def _conjunction_of_dim_eq_conditions(dim_names, values, islvars):
     union_of_maps = isl.Map.from_domain(
         islvars[0].eq_set(islvars[0]+1)  # 0 == 1 (false)
         ).move_dims(
-            dim_type.out, 0, dim_type.in_, len(space_in_names), len(space_out_names))
+            dt.out, 0, dt.in_, len(space_in_names), len(space_out_names))
 
     # Loop through tuple pairs
     for (tup_in, tup_out), dom in tuple_pairs_with_domains:
@@ -220,13 +227,13 @@ def _conjunction_of_dim_eq_conditions(dim_names, values, islvars):
         # Convert set to map by moving dimensions around
         map_from_set = isl.Map.from_domain(condition)
         map_from_set = map_from_set.move_dims(
-            dim_type.out, 0, dim_type.in_,
+            dt.out, 0, dt.in_,
             len(space_in_names), len(space_out_names))
 
         # Align the *out* dims of dom with the space *in_* dims
         # in preparation for intersection
         dom_with_set_dim_aligned = reorder_dims_by_name(
-            dom, isl.dim_type.set,
+            dom, dt.set,
             space_in_names,
             )
 

From 2c2eeb031f514ada6eb7626e4cabfad34893be84 Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Sat, 10 Apr 2021 20:34:58 -0500
Subject: [PATCH 252/460] change remove_var_from_set() to
 remove_vars_from_set() and remove multiple vars at once; don't try to split
 inames in dependencies that don't contain that iname

---
 loopy/transform/iname.py | 64 +++++++++++++++++++---------------------
 1 file changed, 30 insertions(+), 34 deletions(-)

diff --git a/loopy/transform/iname.py b/loopy/transform/iname.py
index 835813c40..4abddd273 100644
--- a/loopy/transform/iname.py
+++ b/loopy/transform/iname.py
@@ -259,14 +259,19 @@ def _split_iname_backend(kernel, iname_to_split,
     # {{{ split iname in deps
 
     from loopy.schedule.checker.schedule import BEFORE_MARK
+    from loopy.schedule.checker.utils import convert_map_to_set
+
     def _split_iname_in_dep(dep):
 
         # Temporarily convert map to set for processing
         # (TODO make generic func for this)
         dt = isl.dim_type
-        n_in_dims = len(dep.get_var_names(dt.in_))
-        n_out_dims = len(dep.get_var_names(dt.out))
-        set_from_map = dep.move_dims(dt.in_, n_in_dims, dt.out, 0, n_out_dims).domain()
+
+        # If iname is not present in dep, return unmodified dep
+        if iname_to_split not in dep.get_var_names(dt.out):
+            return dep
+
+        set_from_map, n_in_dims, n_out_dims = convert_map_to_set(dep)
 
         # Split iname
         s = _split_iname_in_set(
@@ -285,7 +290,7 @@ def _split_iname_in_dep(dep):
             fixed_length_is_inner)
 
         # now set looks like
-        # [... in_dims ..., ... out dims ..., i_outer, i_inner, i_outer', i_inner']
+        # [old_inames' ..., old_inames ..., i_outer, i_inner, i_outer', i_inner']
 
         # Convert set back to map
         map_from_set = isl.Map.from_domain(s)
@@ -1212,13 +1217,17 @@ def get_used_inames(kernel):
     return used_inames
 
 
-def remove_var_from_set(s, var):
-    try:
-        dt, idx = s.get_var_dict()[var]
-    except KeyError:
-        return s
-    else:
-        return s.project_out(dt, idx, 1)
+def remove_vars_from_set(s, remove_vars):
+    from copy import deepcopy
+    new_s = deepcopy(s)
+    for var in remove_vars:
+        try:
+            dt, idx = s.get_var_dict()[var]
+        except KeyError:
+            continue
+        else:
+            new_s = new_s.project_out(dt, idx, 1)
+    return new_s
 
 
 def remove_unused_inames(kernel, inames=None):
@@ -1247,38 +1256,25 @@ def remove_unused_inames(kernel, inames=None):
 
     # {{{ remove them
 
-    domains = kernel.domains
-    for iname in unused_inames:
-        new_domains = []
-
-        for dom in domains:
-            """
-            try:
-                dt, idx = dom.get_var_dict()[iname]
-            except KeyError:
-                pass
-            else:
-                dom = dom.project_out(dt, idx, 1)
-            new_domains.append(dom)
-            """
-            new_domains.append(remove_var_from_set(dom, iname))
-
-        domains = new_domains
+    new_domains = []
+    for dom in kernel.domains:
+        new_domains.append(remove_vars_from_set(dom, unused_inames))
 
-    kernel = kernel.copy(domains=domains)
+    kernel = kernel.copy(domains=new_domains)
 
     # }}}
 
-    # {{{ remove iname from deps
+    # {{{ remove inames from deps
 
     from loopy.transform.instruction import map_stmt_inst_dependencies
     from loopy.schedule.checker.schedule import BEFORE_MARK
+    from loopy.schedule.checker.utils import append_mark_to_strings
+    unused_inames_marked = append_mark_to_strings(unused_inames, BEFORE_MARK)
+
     def _remove_iname_from_dep(dep):
-        pu.db
-        return remove_var_from_set(
-            remove_var_from_set(dep, iname), iname+BEFORE_MARK)
+        return remove_vars_from_set(
+            remove_vars_from_set(dep, unused_inames), unused_inames_marked)
 
-    pu.db
     kernel = map_stmt_inst_dependencies(kernel, "id:*", _remove_iname_from_dep)
 
     # }}}

From 53dd6d31f002d6e78d0fb3a880dd74c4c46f7b9b Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Sat, 10 Apr 2021 20:35:18 -0500
Subject: [PATCH 253/460] add test for dependency handling during split_iname

---
 test/test_linearization_checker.py | 135 +++++++++++++++++++++++++++++
 1 file changed, 135 insertions(+)

diff --git a/test/test_linearization_checker.py b/test/test_linearization_checker.py
index 56854ae94..0d8d75fbe 100644
--- a/test/test_linearization_checker.py
+++ b/test/test_linearization_checker.py
@@ -1391,6 +1391,141 @@ def test_add_stmt_inst_dependency():
 # TODO create more kernels with valid/invalid linearizations to test checker
 
 
+# {{{ Check dependency handling during transformations
+
+def test_split_iname_with_dependencies():
+    knl = lp.make_kernel(
+        "{[i]: 0<=i<p}",
+        """
+        a[i] = 0.1  {id=stmt0}
+        b[i] = a[i]  {id=stmt1,dep=stmt0}
+        """,
+        name="example",
+        assumptions="p >= 1",
+        lang_version=(2018, 2)
+        )
+
+    from copy import deepcopy
+    ref_knl = deepcopy(knl)  # deepcopy necessary?
+
+    # {{{ dep that should be satisfied
+
+    dep_inout_space_str = "[{0}'=0, i'] -> [{0}=1, i]".format(STATEMENT_VAR_NAME)
+    dep = _isl_map_with_marked_dims(
+        "[p] -> { %s : 0 <= i < p and i' = i }"
+        % (dep_inout_space_str))
+
+    knl = lp.add_stmt_inst_dependency(knl, "stmt1", "stmt0", dep)
+    knl = lp.split_iname(knl, "i", 32)
+
+    dep_exp = _isl_map_with_marked_dims(
+        "[p] -> {{ [{0}'=0, i_outer', i_inner'] -> [{0}=1, i_outer, i_inner] : "
+        "0 <= i_inner, i_inner' < 32"  # new bounds
+        " and 0 <= 32*i_outer + i_inner < p"  # transformed bounds (0 <= i < p)
+        " and 0 <= 32*i_outer' + i_inner' < p"  # transformed bounds (0 <= i' < p)
+        " and i_inner + 32*i_outer = 32*i_outer' + i_inner'"  # i = i'
+        "}}".format(STATEMENT_VAR_NAME))
+    dep_found = knl.id_to_insn["stmt1"].dependencies['stmt0'][0]
+
+    # make sure dep is correct
+    _align_and_compare_maps([(dep_exp, dep_found)])
+
+    # Get a linearization
+    proc_knl = preprocess_kernel(knl)
+    lin_knl = get_one_linearized_kernel(proc_knl)
+    lin_items = lin_knl.linearization
+
+    unsatisfied_deps = lp.find_unsatisfied_dependencies(
+        proc_knl, lin_items)
+
+    assert not unsatisfied_deps
+
+    # }}}
+
+    # {{{ dep that should not be satisfied
+
+    knl = ref_knl
+
+    dep_unsatisfied = _isl_map_with_marked_dims(
+        "[p] -> { %s : 0 <= i < p and i' = i + 1 }"
+        % (dep_inout_space_str))
+
+    knl = lp.add_stmt_inst_dependency(knl, "stmt1", "stmt0", dep_unsatisfied)
+    knl = lp.split_iname(knl, "i", 32)
+
+    # Get a linearization
+    proc_knl = preprocess_kernel(knl)
+    lin_knl = get_one_linearized_kernel(proc_knl)
+    lin_items = lin_knl.linearization
+
+    unsatisfied_deps = lp.find_unsatisfied_dependencies(
+        proc_knl, lin_items)
+
+    assert len(unsatisfied_deps) == 1
+
+    # }}}
+
+    # {{{ more deps that should be satisfied
+
+    knl = lp.make_kernel(
+        ["{[i,j]: 0<=i,j<p}", "{[k,m]: 0<=k,m<p}"],
+        """
+        a[i,k] = 0.1  {id=stmt0}
+        b[i,k] = a[i,k]  {id=stmt1,dep=stmt0}
+        c[i,k,j,m] = 0.1  {id=stmt2}
+        d[i,k,j,m] = c[i,k,j,m]  {id=stmt3,dep=stmt2}
+        """,
+        name="example",
+        assumptions="p >= 1",
+        lang_version=(2018, 2)
+        )
+
+    dep_inout_space_str = "[{0}'=0, i', j', k', m'] -> [{0}=1, i, j, k, m]".format(
+        STATEMENT_VAR_NAME)
+    iname_bounds_str = "0 <= i,j,k,m,i',j',k',m' < p"
+    dep1 = _isl_map_with_marked_dims(
+        "[p] -> { %s : %s and i' = i and k' = k}"
+        % (dep_inout_space_str, iname_bounds_str))
+    dep2 = _isl_map_with_marked_dims(
+        "[p] -> { %s : %s and i' < i and k' < k}"
+        % (dep_inout_space_str, iname_bounds_str))
+    dep3 = _isl_map_with_marked_dims(
+        "[p] -> { %s : %s and i' = i and k' = k and j' = j and m' = m}"
+        % (dep_inout_space_str, iname_bounds_str))
+
+    knl = lp.add_stmt_inst_dependency(knl, "stmt1", "stmt0", dep1)
+    knl = lp.add_stmt_inst_dependency(knl, "stmt1", "stmt0", dep2)
+    knl = lp.add_stmt_inst_dependency(knl, "stmt3", "stmt2", dep3)
+
+    # Gratuitous splitting
+    knl = lp.split_iname(knl, "i", 64)
+    knl = lp.split_iname(knl, "j", 64)
+    knl = lp.split_iname(knl, "k", 64)
+    knl = lp.split_iname(knl, "m", 64)
+    knl = lp.split_iname(knl, "i_inner", 8)
+    knl = lp.split_iname(knl, "j_inner", 8)
+    knl = lp.split_iname(knl, "k_inner", 8)
+    knl = lp.split_iname(knl, "m_inner", 8)
+    knl = lp.split_iname(knl, "i_outer", 4)
+    knl = lp.split_iname(knl, "j_outer", 4)
+    knl = lp.split_iname(knl, "k_outer", 4)
+    knl = lp.split_iname(knl, "m_outer", 4)
+
+    # Get a linearization
+    proc_knl = preprocess_kernel(knl)
+    lin_knl = get_one_linearized_kernel(proc_knl)
+    lin_items = lin_knl.linearization
+
+    unsatisfied_deps = lp.find_unsatisfied_dependencies(
+        proc_knl, lin_items)
+
+    assert not unsatisfied_deps
+
+    # }}}
+
+# }}}
+
+
 if __name__ == "__main__":
     if len(sys.argv) > 1:
         exec(sys.argv[1])

From e9e6bc5256e276e0ad25522846aa0141e821e410 Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Sat, 10 Apr 2021 20:42:37 -0500
Subject: [PATCH 254/460] fix flake8 issue

---
 test/test_linearization_checker.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/test/test_linearization_checker.py b/test/test_linearization_checker.py
index 0d8d75fbe..eda2efbd1 100644
--- a/test/test_linearization_checker.py
+++ b/test/test_linearization_checker.py
@@ -1425,7 +1425,7 @@ def test_split_iname_with_dependencies():
         " and 0 <= 32*i_outer' + i_inner' < p"  # transformed bounds (0 <= i' < p)
         " and i_inner + 32*i_outer = 32*i_outer' + i_inner'"  # i = i'
         "}}".format(STATEMENT_VAR_NAME))
-    dep_found = knl.id_to_insn["stmt1"].dependencies['stmt0'][0]
+    dep_found = knl.id_to_insn["stmt1"].dependencies["stmt0"][0]
 
     # make sure dep is correct
     _align_and_compare_maps([(dep_exp, dep_found)])

From d04d1b85d741472311e419ce30585769e0a45e52 Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Sat, 10 Apr 2021 20:46:31 -0500
Subject: [PATCH 255/460] set dt=isl.dim_type to make code prettier

---
 loopy/schedule/checker/utils.py | 25 ++++++++++++-------------
 1 file changed, 12 insertions(+), 13 deletions(-)

diff --git a/loopy/schedule/checker/utils.py b/loopy/schedule/checker/utils.py
index 3c3f5184f..401fd477a 100644
--- a/loopy/schedule/checker/utils.py
+++ b/loopy/schedule/checker/utils.py
@@ -21,6 +21,7 @@
 """
 
 import islpy as isl
+dt = isl.dim_type
 
 
 def prettier_map_string(map_obj):
@@ -62,10 +63,10 @@ def reorder_dims_by_name(
 
     """
 
-    assert dim_type != isl.dim_type.param
+    assert dim_type != dt.param
     assert set(isl_set.get_var_names(dim_type)) == set(desired_dims_ordered)
 
-    other_dim_type = isl.dim_type.param
+    other_dim_type = dt.param
     other_dim_len = len(isl_set.get_var_names(other_dim_type))
 
     new_set = isl_set.copy()
@@ -89,7 +90,7 @@ def ensure_dim_names_match_and_align(obj_map, tgt_map):
     if not all(
             set(obj_map.get_var_names(dt)) == set(tgt_map.get_var_names(dt))
             for dt in
-            [isl.dim_type.in_, isl.dim_type.out, isl.dim_type.param]):
+            [dt.in_, dt.out, dt.param]):
         raise ValueError(
             "Cannot align spaces; names don't match:\n%s\n%s"
             % (prettier_map_string(obj_map), prettier_map_string(tgt_map))
@@ -137,7 +138,7 @@ def append_mark_to_strings(strings, mark):
 
 def sorted_union_of_names_in_isl_sets(
         isl_sets,
-        set_dim=isl.dim_type.set):
+        set_dim=dt.set):
     r"""Return a sorted list of the union of all variable names found in
     the provided :class:`islpy.Set`\ s.
     """
@@ -176,16 +177,14 @@ def create_symbolic_map_from_tuples(
     """
     # TODO allow None for domains
 
-    dim_type = isl.dim_type
-
-    space_out_names = space.get_var_names(dim_type.out)
-    space_in_names = space.get_var_names(isl.dim_type.in_)
+    space_out_names = space.get_var_names(dt.out)
+    space_in_names = space.get_var_names(dt.in_)
 
     # Get islvars from space
     islvars = isl.affs_from_space(
         space.move_dims(
-            isl.dim_type.out, 0,
-            isl.dim_type.in_, 0,
+            dt.out, 0,
+            dt.in_, 0,
             len(space_in_names),
             ).range()
         )
@@ -205,7 +204,7 @@ def _conjunction_of_dim_eq_conditions(dim_names, values, islvars):
     union_of_maps = isl.Map.from_domain(
         islvars[0].eq_set(islvars[0]+1)  # 0 == 1 (false)
         ).move_dims(
-            dim_type.out, 0, dim_type.in_, len(space_in_names), len(space_out_names))
+            dt.out, 0, dt.in_, len(space_in_names), len(space_out_names))
 
     # Loop through tuple pairs
     for (tup_in, tup_out), dom in tuple_pairs_with_domains:
@@ -221,13 +220,13 @@ def _conjunction_of_dim_eq_conditions(dim_names, values, islvars):
         # Convert set to map by moving dimensions around
         map_from_set = isl.Map.from_domain(condition)
         map_from_set = map_from_set.move_dims(
-            dim_type.out, 0, dim_type.in_,
+            dt.out, 0, dt.in_,
             len(space_in_names), len(space_out_names))
 
         # Align the *out* dims of dom with the space *in_* dims
         # in preparation for intersection
         dom_with_set_dim_aligned = reorder_dims_by_name(
-            dom, isl.dim_type.set,
+            dom, dt.set,
             space_in_names,
             )
 

From 774f430f5ce22e291cfd55b6d9a1999dfaf0411f Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Sat, 10 Apr 2021 22:57:37 -0500
Subject: [PATCH 256/460] minor code/comment cleanup realted to dep handling in
 split_iname

---
 loopy/transform/iname.py | 19 +++++++++----------
 1 file changed, 9 insertions(+), 10 deletions(-)

diff --git a/loopy/transform/iname.py b/loopy/transform/iname.py
index 4abddd273..90e4b79f3 100644
--- a/loopy/transform/iname.py
+++ b/loopy/transform/iname.py
@@ -256,44 +256,43 @@ def _split_iname_backend(kernel, iname_to_split,
             for dom in kernel.domains]
     from loopy.transform.instruction import map_stmt_inst_dependencies
 
-    # {{{ split iname in deps
+    # {{{ Split iname in dependencies
 
     from loopy.schedule.checker.schedule import BEFORE_MARK
     from loopy.schedule.checker.utils import convert_map_to_set
 
     def _split_iname_in_dep(dep):
-
-        # Temporarily convert map to set for processing
-        # (TODO make generic func for this)
         dt = isl.dim_type
 
         # If iname is not present in dep, return unmodified dep
         if iname_to_split not in dep.get_var_names(dt.out):
             return dep
 
+        # Temporarily convert map to set for processing
         set_from_map, n_in_dims, n_out_dims = convert_map_to_set(dep)
 
         # Split iname
-        s = _split_iname_in_set(
+        set_from_map = _split_iname_in_set(
             set_from_map,
             iname_to_split,
             inner_iname,
             outer_iname,
             fixed_length,
             fixed_length_is_inner)
-        s = _split_iname_in_set(
-            s,
+        # Split iname'
+        set_from_map = _split_iname_in_set(
+            set_from_map,
             iname_to_split+BEFORE_MARK,
             inner_iname+BEFORE_MARK,
             outer_iname+BEFORE_MARK,
             fixed_length,
             fixed_length_is_inner)
 
-        # now set looks like
+        # Now set dims look like
         # [old_inames' ..., old_inames ..., i_outer, i_inner, i_outer', i_inner']
 
         # Convert set back to map
-        map_from_set = isl.Map.from_domain(s)
+        map_from_set = isl.Map.from_domain(set_from_map)
         # move original out dims + 2 new dims:
         map_from_set = map_from_set.move_dims(
             dt.out, 0, dt.in_, n_in_dims, n_out_dims+2)
@@ -1264,7 +1263,7 @@ def remove_unused_inames(kernel, inames=None):
 
     # }}}
 
-    # {{{ remove inames from deps
+    # {{{ Remove inames from deps
 
     from loopy.transform.instruction import map_stmt_inst_dependencies
     from loopy.schedule.checker.schedule import BEFORE_MARK

From 7b396b03be4107e088b646048f4cbd2ad5ad7f0a Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Sun, 11 Apr 2021 02:32:26 -0500
Subject: [PATCH 257/460] rename
 map_stmt_inst_dependencies()->map_stmt_inst_dependency_maps() (maps
 individual dep maps); create map_stmt_inst_dependencies() (maps
 stmt.dependencies, which contains multiple dep maps)

---
 loopy/transform/instruction.py | 60 ++++++++++++++++++----------------
 1 file changed, 32 insertions(+), 28 deletions(-)

diff --git a/loopy/transform/instruction.py b/loopy/transform/instruction.py
index 0918f40fd..fc43bf5df 100644
--- a/loopy/transform/instruction.py
+++ b/loopy/transform/instruction.py
@@ -117,6 +117,33 @@ def add_dep(insn):
 # }}}
 
 
+# {{{ map_stmt_inst_dependencies
+
+def map_stmt_inst_dependencies(kernel, stmt_match, f):
+    # Set stmt.dependences = f(stmt.dependencies) for stmts matching stmt_match
+
+    def _update_deps(stmt):
+        new_deps = f(stmt.dependencies)
+        return stmt.copy(dependencies=new_deps)
+
+    return map_instructions(kernel, stmt_match, _update_deps)
+
+
+def map_stmt_inst_dependency_maps(kernel, stmt_match, f):
+    # Set map = f(map) for all dep maps in stmt.dependencies.values()
+    # for statements matching stmt_match
+
+    def _update_dep_map(stmt_deps):
+        new_deps = {}
+        for dep_id, deps in stmt_deps.items():
+            new_deps[dep_id] = [f(dep) for dep in deps]
+        return new_deps
+
+    return map_stmt_inst_dependencies(kernel, stmt_match, _update_dep_map)
+
+# }}}
+
+
 # {{{ add_stmt_inst_dependency
 
 def add_stmt_inst_dependency(
@@ -134,38 +161,15 @@ def add_stmt_inst_dependency(
                 "cannot add dependency %s->%s"
                 % (depends_on_id, depends_on_id, stmt_id))
 
-    matched = [False]
-
-    def _add_dep(stmt):
-        new_deps_dict = stmt.dependencies  # dict mapping depends-on ids to dep maps
-        matched[0] = True
-        new_deps_dict.setdefault(depends_on_id, []).append(new_dependency)
-        return stmt.copy(dependencies=new_deps_dict)
+    def _add_dep(stmt_deps):
+        # stmt_deps: dict mapping depends-on ids to dep maps
+        stmt_deps.setdefault(depends_on_id, []).append(new_dependency)
+        return stmt_deps
 
-    result = map_instructions(kernel, "id:%s" % (stmt_id), _add_dep)
-
-    if not matched[0]:  # Is this possible, given check above?
-        raise LoopyError("no instructions found matching '%s' "
-                "(to which dependencies would be added)" % stmt_id)
+    result = map_stmt_inst_dependencies(kernel, "id:%s" % (stmt_id), _add_dep)
 
     return result
 
-
-# {{{ map_stmt_inst_dependencies
-
-def map_stmt_inst_dependencies(kernel, stmt_match, f):
-
-    def _update_dep(stmt):
-        new_deps = {}
-        for dep_id, deps in stmt.dependencies.items():
-            new_deps[dep_id] = [f(dep) for dep in deps]
-
-        return stmt.copy(dependencies=new_deps)
-
-    return map_instructions(kernel, stmt_match, _update_dep)
-
-# }}}
-
 # }}}
 
 

From c414b8c54018be5ab06b6db04e910d63da54250e Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Sun, 11 Apr 2021 02:33:32 -0500
Subject: [PATCH 258/460] update after renaming
 map_stmt_inst_dependencies()->map_stmt_inst_dependency_maps()

---
 loopy/transform/iname.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/loopy/transform/iname.py b/loopy/transform/iname.py
index 90e4b79f3..ada152624 100644
--- a/loopy/transform/iname.py
+++ b/loopy/transform/iname.py
@@ -254,10 +254,10 @@ def _split_iname_backend(kernel, iname_to_split,
             _split_iname_in_set(dom, iname_to_split, inner_iname, outer_iname,
                 fixed_length, fixed_length_is_inner)
             for dom in kernel.domains]
-    from loopy.transform.instruction import map_stmt_inst_dependencies
 
     # {{{ Split iname in dependencies
 
+    from loopy.transform.instruction import map_stmt_inst_dependency_maps
     from loopy.schedule.checker.schedule import BEFORE_MARK
     from loopy.schedule.checker.utils import convert_map_to_set
 
@@ -299,7 +299,7 @@ def _split_iname_in_dep(dep):
 
         return map_from_set
 
-    kernel = map_stmt_inst_dependencies(kernel, "id:*", _split_iname_in_dep)
+    kernel = map_stmt_inst_dependency_maps(kernel, "id:*", _split_iname_in_dep)
 
     # }}}
 
@@ -1265,7 +1265,7 @@ def remove_unused_inames(kernel, inames=None):
 
     # {{{ Remove inames from deps
 
-    from loopy.transform.instruction import map_stmt_inst_dependencies
+    from loopy.transform.instruction import map_stmt_inst_dependency_maps
     from loopy.schedule.checker.schedule import BEFORE_MARK
     from loopy.schedule.checker.utils import append_mark_to_strings
     unused_inames_marked = append_mark_to_strings(unused_inames, BEFORE_MARK)
@@ -1274,7 +1274,7 @@ def _remove_iname_from_dep(dep):
         return remove_vars_from_set(
             remove_vars_from_set(dep, unused_inames), unused_inames_marked)
 
-    kernel = map_stmt_inst_dependencies(kernel, "id:*", _remove_iname_from_dep)
+    kernel = map_stmt_inst_dependency_maps(kernel, "id:*", _remove_iname_from_dep)
 
     # }}}
 

From 66a825f5cf635587c33f38050cde57cd9d7bc20b Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Sun, 11 Apr 2021 06:45:49 -0500
Subject: [PATCH 259/460] when removing instructions, remove any new-world
 dependencies on those instructions (for now)

---
 loopy/transform/instruction.py | 15 +++++++++++++--
 1 file changed, 13 insertions(+), 2 deletions(-)

diff --git a/loopy/transform/instruction.py b/loopy/transform/instruction.py
index fc43bf5df..b9d720261 100644
--- a/loopy/transform/instruction.py
+++ b/loopy/transform/instruction.py
@@ -208,14 +208,25 @@ def remove_instructions(kernel, insn_ids):
         for dep_id in depends_on & insn_ids:
             new_deps = new_deps | id_to_insn[dep_id].depends_on
 
+        # {{{ Remove any new-world stmt inst dependencies on removed stmts
+
+        new_dependencies = insn.dependencies
+        for removed_id in insn_ids:
+            # TODO propagate these intelligently?
+            new_dependencies.pop(removed_id, None)
+
+        # }}}
+
         # update no_sync_with
 
         new_no_sync_with = frozenset((insn_id, scope)
                 for insn_id, scope in insn.no_sync_with
                 if insn_id not in insn_ids)
 
-        new_insns.append(
-                insn.copy(depends_on=new_deps, no_sync_with=new_no_sync_with))
+        new_insns.append(insn.copy(
+            depends_on=new_deps,
+            dependencies=new_dependencies,
+            no_sync_with=new_no_sync_with))
 
     return kernel.copy(
             instructions=new_insns)

From 9aaba11afb0e7d4427ec98647ae9c82c4afd06d1 Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Sun, 11 Apr 2021 06:51:47 -0500
Subject: [PATCH 260/460] in assignment_to_subst(), copy the dependencies of
 the def stmt to all stmts where its subst expr was used

---
 loopy/transform/subst.py | 62 ++++++++++++++++++++++++++++++++++++----
 1 file changed, 56 insertions(+), 6 deletions(-)

diff --git a/loopy/transform/subst.py b/loopy/transform/subst.py
index 565c69a49..84caac454 100644
--- a/loopy/transform/subst.py
+++ b/loopy/transform/subst.py
@@ -211,9 +211,9 @@ def __init__(self, rule_mapping_context, lhs_name, definition_insn_ids,
 
         self.definition_insn_id_to_subst_name = {}
 
-        self.saw_unmatched_usage_sites = {}
+        self.unmatched_usage_sites_found = {}
         for def_id in self.definition_insn_ids:
-            self.saw_unmatched_usage_sites[def_id] = False
+            self.unmatched_usage_sites_found[def_id] = set()
 
     def get_subst_name(self, def_insn_id):
         try:
@@ -255,7 +255,7 @@ def transform_access(self, index, expn_state):
                 expn_state.kernel,
                 expn_state.instruction,
                 expn_state.stack):
-            self.saw_unmatched_usage_sites[my_def_id] = True
+            self.unmatched_usage_sites_found[my_def_id].add[my_insn_id]
             return None
 
         subst_name = self.get_subst_name(my_def_id)
@@ -338,6 +338,7 @@ def get_relevant_definition_insn_id(usage_insn_id):
         return def_id
 
     usage_to_definition = {}
+    definition_to_usage_ids = {}
 
     for insn in dep_kernel.instructions:
         if lhs_name not in insn.read_dependency_names():
@@ -350,11 +351,29 @@ def get_relevant_definition_insn_id(usage_insn_id):
                     % (lhs_name, insn.id))
 
         usage_to_definition[insn.id] = def_id
+        definition_to_usage_ids.setdefault(def_id, set()).add(insn.id)
 
+    # Get deps for subst_def statements before any of them get removed
+    definition_id_to_deps = {}
+    from copy import deepcopy
     definition_insn_ids = set()
     for insn in kernel.instructions:
         if lhs_name in insn.write_dependency_names():
             definition_insn_ids.add(insn.id)
+            definition_id_to_deps[insn.id] = deepcopy(insn.dependencies)
+
+    # TODO refactor after answering question:
+    # what's the difference between definition_insn_ids and
+    # set(usage_to_definition.values())?
+    if definition_insn_ids != set(usage_to_definition.values()):
+        print("="*80)
+        print("Apparently these are not equivalent after all. James was wrong.")
+        print("definition_insn_ids:")
+        print(definition_insn_ids)
+        print("set(usage_to_definition.values()):")
+        print(set(usage_to_definition.values()))
+        print("="*80)
+        assert False
 
     # }}}
 
@@ -419,7 +438,7 @@ def get_relevant_definition_insn_id(usage_insn_id):
     new_args = kernel.args
 
     if lhs_name in kernel.temporary_variables:
-        if not any(tts.saw_unmatched_usage_sites.values()):
+        if not any(tts.unmatched_usage_sites_found.values()):
             # All usage sites matched--they're now substitution rules.
             # We can get rid of the variable.
 
@@ -427,7 +446,7 @@ def get_relevant_definition_insn_id(usage_insn_id):
             del new_temp_vars[lhs_name]
 
     if lhs_name in kernel.arg_dict and not force_retain_argument:
-        if not any(tts.saw_unmatched_usage_sites.values()):
+        if not any(tts.unmatched_usage_sites_found.values()):
             # All usage sites matched--they're now substitution rules.
             # We can get rid of the argument
 
@@ -440,13 +459,44 @@ def get_relevant_definition_insn_id(usage_insn_id):
     # }}}
 
     import loopy as lp
+    # Remove defs if the subst expression is not still used anywhere
     kernel = lp.remove_instructions(
             kernel,
             {
                 insn_id
-                for insn_id, still_used in tts.saw_unmatched_usage_sites.items()
+                for insn_id, still_used in tts.unmatched_usage_sites_found.items()
                 if not still_used})
 
+    # {{{ update dependencies
+
+    from loopy.transform.instruction import map_stmt_inst_dependencies
+
+    # Add dependencies from each subst_def to any statement where its
+    # LHS was found and the subst was performed
+    for subst_def_id, subst_usage_ids in definition_to_usage_ids.items():
+
+        unmatched_usage_ids = tts.unmatched_usage_sites_found[subst_def_id]
+        matched_usage_ids = subst_usage_ids - unmatched_usage_ids
+        if matched_usage_ids:
+            # Create match condition string:
+            match_any_matched_usage_id = " or ".join(
+                ["id:%s" % (usage_id) for usage_id in matched_usage_ids])
+
+            subst_def_deps_dict = definition_id_to_deps[subst_def_id]
+
+            def _add_deps_to_stmt(dep_dict):
+                # dep_dict: prev dep dict for this stmt
+                # add the deps
+                for depends_on_id, dep_list in subst_def_deps_dict.items():
+                    dep_list_copy = deepcopy(dep_list)
+                    dep_dict.setdefault(depends_on_id, []).extend(dep_list_copy)
+                return dep_dict
+
+            kernel = map_stmt_inst_dependencies(
+                kernel, match_any_matched_usage_id, _add_deps_to_stmt)
+
+    # }}}
+
     return kernel.copy(
             substitutions=new_substs,
             temporary_variables=new_temp_vars,

From 8263edff7f4eeac20a5e1c756f82c5a7e9debfd2 Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Sun, 11 Apr 2021 06:52:29 -0500
Subject: [PATCH 261/460] add preliminary test for dep handling in
 assignment_to_subst()

---
 test/test_linearization_checker.py | 51 ++++++++++++++++++++++++++++++
 1 file changed, 51 insertions(+)

diff --git a/test/test_linearization_checker.py b/test/test_linearization_checker.py
index eda2efbd1..3b1f28466 100644
--- a/test/test_linearization_checker.py
+++ b/test/test_linearization_checker.py
@@ -1393,6 +1393,57 @@ def test_add_stmt_inst_dependency():
 
 # {{{ Check dependency handling during transformations
 
+def test_assignment_to_subst_with_dependencies():
+    knl = lp.make_kernel(
+        "{[i]: 0 <= i < n}",
+        """
+        <>temp0 = 0.1*i {id=stmt0}
+        <>tsq = temp0**2  {id=stmt1,dep=stmt0}
+        a[i] = 23*tsq + 25*tsq  {id=stmt2,dep=stmt1}
+        <>temp1 = 3*tsq  {id=stmt3,dep=stmt1}
+        <>temp2 = 5.5*i {id=stmt4,dep=stmt1}
+        """)
+
+    # TODO test where 'within' for subst doesn't match all occurances?
+    # TODO what if stmt2 depends on <>tsq = b[i-1]**2 and then we do
+    #     assignment to subst? remove i'=i from dep?
+    # TODO what if stmt3 doesn't have iname i in it?
+    knl = lp.add_and_infer_dtypes(knl, {"a": np.float32})
+
+    print("instructions before subst")
+    for insn in knl.instructions:
+        print(insn)
+
+    dep_eq = _isl_map_with_marked_dims(
+        "[n] -> {{ [{0}'=0, i']->[{0}=1, i] : "
+        "0 <= i,i' < n and i' = i"
+        "}}".format(STATEMENT_VAR_NAME))
+    dep_le = _isl_map_with_marked_dims(
+        "[n] -> {{ [{0}'=0, i']->[{0}=1, i] : "
+        "0 <= i,i' < n and i' <= i"
+        "}}".format(STATEMENT_VAR_NAME))
+
+    from copy import deepcopy
+    knl = lp.add_stmt_inst_dependency(knl, "stmt1", "stmt0", deepcopy(dep_le))
+    knl = lp.add_stmt_inst_dependency(knl, "stmt2", "stmt1", deepcopy(dep_eq))
+    knl = lp.add_stmt_inst_dependency(knl, "stmt3", "stmt1", deepcopy(dep_eq))
+    knl = lp.add_stmt_inst_dependency(knl, "stmt4", "stmt1", deepcopy(dep_eq))
+
+    knl = lp.assignment_to_subst(knl, "tsq")
+
+    for stmt_id in ["stmt2", "stmt3"]:
+        deps_found = knl.id_to_insn[stmt_id].dependencies
+
+        # Dep on stmt1 should have been removed
+        assert list(deps_found.keys()) == ["stmt0"]
+        assert len(deps_found["stmt0"]) == 1
+
+        # Should now depend on stmt0
+        _align_and_compare_maps([(dep_le, deps_found["stmt0"][0])])
+
+    assert not knl.id_to_insn["stmt4"].dependencies
+
+
 def test_split_iname_with_dependencies():
     knl = lp.make_kernel(
         "{[i]: 0<=i<p}",

From 645dae8b06a4fee7e1bb61bedc383167fe0ab6de Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Tue, 13 Apr 2021 01:10:24 -0500
Subject: [PATCH 262/460] fix typo bug

---
 loopy/transform/subst.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/loopy/transform/subst.py b/loopy/transform/subst.py
index 84caac454..e4f1cb6ed 100644
--- a/loopy/transform/subst.py
+++ b/loopy/transform/subst.py
@@ -255,7 +255,7 @@ def transform_access(self, index, expn_state):
                 expn_state.kernel,
                 expn_state.instruction,
                 expn_state.stack):
-            self.unmatched_usage_sites_found[my_def_id].add[my_insn_id]
+            self.unmatched_usage_sites_found[my_def_id].add(my_insn_id)
             return None
 
         subst_name = self.get_subst_name(my_def_id)

From 134b034d661b21d4d0f04be0047e6bb99dfdc2aa Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Tue, 13 Apr 2021 01:11:12 -0500
Subject: [PATCH 263/460] test dep handling during assignment_to_subst when
 'within' doesn't match all usage sites

---
 test/test_linearization_checker.py | 61 ++++++++++++++++++++++++++----
 1 file changed, 54 insertions(+), 7 deletions(-)

diff --git a/test/test_linearization_checker.py b/test/test_linearization_checker.py
index 3b1f28466..f8b683ab0 100644
--- a/test/test_linearization_checker.py
+++ b/test/test_linearization_checker.py
@@ -1400,20 +1400,15 @@ def test_assignment_to_subst_with_dependencies():
         <>temp0 = 0.1*i {id=stmt0}
         <>tsq = temp0**2  {id=stmt1,dep=stmt0}
         a[i] = 23*tsq + 25*tsq  {id=stmt2,dep=stmt1}
-        <>temp1 = 3*tsq  {id=stmt3,dep=stmt1}
-        <>temp2 = 5.5*i {id=stmt4,dep=stmt1}
+        <>temp3 = 3*tsq  {id=stmt3,dep=stmt1}
+        <>temp4 = 5.5*i {id=stmt4,dep=stmt1}
         """)
 
-    # TODO test where 'within' for subst doesn't match all occurances?
     # TODO what if stmt2 depends on <>tsq = b[i-1]**2 and then we do
     #     assignment to subst? remove i'=i from dep?
     # TODO what if stmt3 doesn't have iname i in it?
     knl = lp.add_and_infer_dtypes(knl, {"a": np.float32})
 
-    print("instructions before subst")
-    for insn in knl.instructions:
-        print(insn)
-
     dep_eq = _isl_map_with_marked_dims(
         "[n] -> {{ [{0}'=0, i']->[{0}=1, i] : "
         "0 <= i,i' < n and i' = i"
@@ -1443,6 +1438,58 @@ def test_assignment_to_subst_with_dependencies():
 
     assert not knl.id_to_insn["stmt4"].dependencies
 
+    # Test using 'within' --------------------------------------------------
+
+    knl = lp.make_kernel(
+        "{[i]: 0 <= i < n}",
+        """
+        <>temp0 = 0.1*i {id=stmt0}
+        <>tsq = temp0**2  {id=stmt1,dep=stmt0}
+        a[i] = 23*tsq + 25*tsq  {id=stmt2,dep=stmt1}
+        <>temp3 = 3*tsq  {id=stmt3,dep=stmt1}
+        <>temp4 = 5.5*i {id=stmt4,dep=stmt1}
+        <>temp5 = 5.6*tsq*i {id=stmt5,dep=stmt1}
+        """)
+
+    knl = lp.add_and_infer_dtypes(knl, {"a": np.float32})
+
+    knl = lp.add_stmt_inst_dependency(knl, "stmt1", "stmt0", deepcopy(dep_le))
+    knl = lp.add_stmt_inst_dependency(knl, "stmt2", "stmt1", deepcopy(dep_eq))
+    knl = lp.add_stmt_inst_dependency(knl, "stmt3", "stmt1", deepcopy(dep_eq))
+    knl = lp.add_stmt_inst_dependency(knl, "stmt4", "stmt1", deepcopy(dep_eq))
+    knl = lp.add_stmt_inst_dependency(knl, "stmt5", "stmt1", deepcopy(dep_eq))
+
+    knl = lp.assignment_to_subst(knl, "tsq", within="id:stmt2 or id:stmt3")
+
+    # replacement will not be made in stmt5, so stmt1 will not be removed,
+    # which means no deps will be removed, and the statements were the replacement
+    # *was* made (stmt2 and stmt3) will still receive the deps from stmt1
+
+    for stmt_id in ["stmt2", "stmt3"]:
+        deps_found = knl.id_to_insn[stmt_id].dependencies
+
+        # Dep on stmt1 should NOT have been removed
+        # (for now? could maybe do something smarter)
+        assert set(deps_found.keys()) == set(["stmt0", "stmt1"])
+        assert len(deps_found["stmt0"]) == len(deps_found["stmt1"]) == 1
+
+        # Should now depend on stmt0
+        _align_and_compare_maps([(dep_le, deps_found["stmt0"][0])])
+
+        # Should still depend on stmt1
+        _align_and_compare_maps([(dep_eq, deps_found["stmt1"][0])])
+
+    for stmt_id in ["stmt4", "stmt5"]:
+        deps_found = knl.id_to_insn[stmt_id].dependencies
+
+        # Dep on stmt1 should NOT have been removed
+        # (for now? could maybe do something smarter)
+        assert set(deps_found.keys()) == set(["stmt1"])
+        assert len(deps_found["stmt1"]) == 1
+
+        # Should still depend on stmt1
+        _align_and_compare_maps([(dep_eq, deps_found["stmt1"][0])])
+
 
 def test_split_iname_with_dependencies():
     knl = lp.make_kernel(

From 036a117ae5c05e7513a5137dc8601648092cd70a Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Tue, 13 Apr 2021 01:46:40 -0500
Subject: [PATCH 264/460] fix parameter in dependencies

---
 loopy/transform/parameter.py | 28 ++++++++++++++++++++++++++++
 1 file changed, 28 insertions(+)

diff --git a/loopy/transform/parameter.py b/loopy/transform/parameter.py
index 62a1209f0..428ed05e9 100644
--- a/loopy/transform/parameter.py
+++ b/loopy/transform/parameter.py
@@ -87,6 +87,34 @@ def process_set(s):
 
     new_domains = [process_set(dom) for dom in kernel.domains]
 
+    # {{{ Fix parameter in deps
+
+    from loopy.transform.instruction import map_stmt_inst_dependency_maps
+    from loopy.schedule.checker.utils import convert_map_to_set
+
+    def _fix_parameter_in_dep(dep):
+        # For efficiency: could check for param presence first
+        dt = isl.dim_type
+
+        # Temporarily convert map to set for processing
+        set_from_map, n_in_dims, n_out_dims = convert_map_to_set(dep)
+
+        # Fix param
+        set_from_map = process_set(set_from_map)
+
+        # Now set dims look like [inames' ..., inames ...]
+        # Convert set back to map
+        map_from_set = isl.Map.from_domain(set_from_map)
+        # Move original out dims back
+        map_from_set = map_from_set.move_dims(
+            dt.out, 0, dt.in_, n_in_dims, n_out_dims)
+
+        return map_from_set
+
+    kernel = map_stmt_inst_dependency_maps(kernel, "id:*", _fix_parameter_in_dep)
+
+    # }}}
+
     from pymbolic.mapper.substitutor import make_subst_func
     subst_func = make_subst_func({name: value})
 

From 782400969e3b8a93e486fe3caf5c809f32e0f847 Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Tue, 13 Apr 2021 01:48:00 -0500
Subject: [PATCH 265/460] add test for dep handling during fix_parameters

---
 test/test_linearization_checker.py | 47 ++++++++++++++++++++++++++++--
 1 file changed, 45 insertions(+), 2 deletions(-)

diff --git a/test/test_linearization_checker.py b/test/test_linearization_checker.py
index f8b683ab0..179a2ac01 100644
--- a/test/test_linearization_checker.py
+++ b/test/test_linearization_checker.py
@@ -1393,6 +1393,46 @@ def test_add_stmt_inst_dependency():
 
 # {{{ Check dependency handling during transformations
 
+def test_fix_parameters_with_dependencies():
+    knl = lp.make_kernel(
+        "{[i,j]: 0 <= i < n and 0 <= j < m}",
+        """
+        <>temp0 = 0.1*i+j {id=stmt0}
+        <>tsq = temp0**2+i+j  {id=stmt1,dep=stmt0}
+        a[i,j] = 23*tsq + 25*tsq+j  {id=stmt2,dep=stmt1}
+        """)
+
+    knl = lp.add_and_infer_dtypes(knl, {"a": np.float32})
+
+    dep_orig = _isl_map_with_marked_dims(
+        "[n,m] -> {{ [{0}'=0, i', j']->[{0}=1, i, j] : "
+        "0 <= i,i' < n and 0 <= j,j' < m "
+        "and i' = i and j' = j"
+        "}}".format(STATEMENT_VAR_NAME))
+
+    from copy import deepcopy
+    knl = lp.add_stmt_inst_dependency(knl, "stmt1", "stmt0", deepcopy(dep_orig))
+    knl = lp.add_stmt_inst_dependency(knl, "stmt2", "stmt1", deepcopy(dep_orig))
+
+    fix_val = 64
+    knl = lp.fix_parameters(knl, m=fix_val)
+
+    dep_exp = _isl_map_with_marked_dims(
+        "[n] -> {{ [{0}'=0, i', j']->[{0}=1, i, j] : "
+        "0 <= i,i' < n and 0 <= j,j' < {1} "
+        "and i' = i and j' = j"
+        "}}".format(STATEMENT_VAR_NAME, fix_val))
+
+    for stmt_id, dep_id in [("stmt1", "stmt0"), ("stmt2", "stmt1")]:
+        deps_found = knl.id_to_insn[stmt_id].dependencies
+
+        assert set(deps_found.keys()) == set([dep_id])
+        assert len(deps_found[dep_id]) == 1
+
+        # Check dep
+        _align_and_compare_maps([(dep_exp, deps_found[dep_id][0])])
+
+
 def test_assignment_to_subst_with_dependencies():
     knl = lp.make_kernel(
         "{[i]: 0 <= i < n}",
@@ -1404,9 +1444,10 @@ def test_assignment_to_subst_with_dependencies():
         <>temp4 = 5.5*i {id=stmt4,dep=stmt1}
         """)
 
+    # TODO test with multiple subst definition sites
     # TODO what if stmt2 depends on <>tsq = b[i-1]**2 and then we do
     #     assignment to subst? remove i'=i from dep?
-    # TODO what if stmt3 doesn't have iname i in it?
+    # TODO what if, e.g., stmt3 doesn't have iname i in it?
     knl = lp.add_and_infer_dtypes(knl, {"a": np.float32})
 
     dep_eq = _isl_map_with_marked_dims(
@@ -1430,7 +1471,7 @@ def test_assignment_to_subst_with_dependencies():
         deps_found = knl.id_to_insn[stmt_id].dependencies
 
         # Dep on stmt1 should have been removed
-        assert list(deps_found.keys()) == ["stmt0"]
+        assert set(deps_found.keys()) == set(["stmt0"])
         assert len(deps_found["stmt0"]) == 1
 
         # Should now depend on stmt0
@@ -1503,6 +1544,8 @@ def test_split_iname_with_dependencies():
         lang_version=(2018, 2)
         )
 
+    # TODO test split_iname 'within'
+
     from copy import deepcopy
     ref_knl = deepcopy(knl)  # deepcopy necessary?
 

From 0816d3509a45804a64053547e79fb7e67ccf06b7 Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Tue, 13 Apr 2021 05:02:14 -0500
Subject: [PATCH 266/460] in dependency mapper functions, allow for separate
 matching of depender and dependee statements; create map_dependency_lists();
 rename map_stmt_inst_dependency_maps()->map_dependency_maps(); rename
 map_stmt_inst_dependencies()->map_stmt_dependencies()

---
 loopy/transform/iname.py       | 15 +++++---
 loopy/transform/instruction.py | 64 ++++++++++++++++++++++++++++------
 loopy/transform/parameter.py   |  4 +--
 loopy/transform/subst.py       |  4 +--
 4 files changed, 68 insertions(+), 19 deletions(-)

diff --git a/loopy/transform/iname.py b/loopy/transform/iname.py
index ada152624..7cd57ad85 100644
--- a/loopy/transform/iname.py
+++ b/loopy/transform/iname.py
@@ -257,7 +257,7 @@ def _split_iname_backend(kernel, iname_to_split,
 
     # {{{ Split iname in dependencies
 
-    from loopy.transform.instruction import map_stmt_inst_dependency_maps
+    from loopy.transform.instruction import map_dependency_maps
     from loopy.schedule.checker.schedule import BEFORE_MARK
     from loopy.schedule.checker.utils import convert_map_to_set
 
@@ -299,7 +299,11 @@ def _split_iname_in_dep(dep):
 
         return map_from_set
 
-    kernel = map_stmt_inst_dependency_maps(kernel, "id:*", _split_iname_in_dep)
+    # TODO currently this gets applied to all maps
+    # instead, handle 'within'
+    kernel = map_dependency_maps(
+        kernel, _split_iname_in_dep,
+        stmt_match_depender="id:*", stmt_match_dependee="id:*")
 
     # }}}
 
@@ -893,6 +897,7 @@ def duplicate_inames(kernel, inames, within, new_inames=None, suffix=None,
 
     name_gen = kernel.get_var_name_generator()
 
+    # Generate new iname names
     for i, iname in enumerate(inames):
         new_iname = new_inames[i]
 
@@ -926,6 +931,8 @@ def duplicate_inames(kernel, inames, within, new_inames=None, suffix=None,
                 domains=domch.get_domains_with(
                     duplicate_axes(domch.domain, [old_iname], [new_iname])))
 
+        # TODO For any statements matching 'within', duplicate iname in deps...?
+
     # }}}
 
     # {{{ change the inames in the code
@@ -1265,7 +1272,7 @@ def remove_unused_inames(kernel, inames=None):
 
     # {{{ Remove inames from deps
 
-    from loopy.transform.instruction import map_stmt_inst_dependency_maps
+    from loopy.transform.instruction import map_dependency_maps
     from loopy.schedule.checker.schedule import BEFORE_MARK
     from loopy.schedule.checker.utils import append_mark_to_strings
     unused_inames_marked = append_mark_to_strings(unused_inames, BEFORE_MARK)
@@ -1274,7 +1281,7 @@ def _remove_iname_from_dep(dep):
         return remove_vars_from_set(
             remove_vars_from_set(dep, unused_inames), unused_inames_marked)
 
-    kernel = map_stmt_inst_dependency_maps(kernel, "id:*", _remove_iname_from_dep)
+    kernel = map_dependency_maps(kernel, _remove_iname_from_dep)
 
     # }}}
 
diff --git a/loopy/transform/instruction.py b/loopy/transform/instruction.py
index b9d720261..35633089e 100644
--- a/loopy/transform/instruction.py
+++ b/loopy/transform/instruction.py
@@ -117,10 +117,20 @@ def add_dep(insn):
 # }}}
 
 
-# {{{ map_stmt_inst_dependencies
+# {{{ map dependencies
 
-def map_stmt_inst_dependencies(kernel, stmt_match, f):
+# Terminiology:
+# stmtX.dependencies:  # <- "stmt dependencies" = full dict of deps
+# {stmt0: [dep_map00, dep_map01, ...],  # <- "one dependency"
+#  stmt1: [dep_map10, dep_map11, ...],
+#  ...}
+# one dependency includes one "dependency list", which contains "dep maps"
+
+
+def map_stmt_dependencies(kernel, stmt_match, f):
     # Set stmt.dependences = f(stmt.dependencies) for stmts matching stmt_match
+    # Only modifies dependencies for depender!
+    # Does not search for matching dependees of non-matching depender statements!
 
     def _update_deps(stmt):
         new_deps = f(stmt.dependencies)
@@ -129,17 +139,49 @@ def _update_deps(stmt):
     return map_instructions(kernel, stmt_match, _update_deps)
 
 
-def map_stmt_inst_dependency_maps(kernel, stmt_match, f):
-    # Set map = f(map) for all dep maps in stmt.dependencies.values()
-    # for statements matching stmt_match
+def map_dependency_lists(
+        kernel, f, stmt_match_depender="id:*", stmt_match_dependee="id:*"):
+    # Set dependency = f(dependency) for:
+    # All deps of stmts matching stmt_match_depender
+    # All deps ON stmts matching stmt_match_dependee
+
+    from loopy.match import parse_match
+    match_depender = parse_match(stmt_match_depender)
+    match_dependee = parse_match(stmt_match_dependee)
+
+    new_stmts = []
 
-    def _update_dep_map(stmt_deps):
+    for stmt in kernel.instructions:
         new_deps = {}
-        for dep_id, deps in stmt_deps.items():
-            new_deps[dep_id] = [f(dep) for dep in deps]
-        return new_deps
+        if match_depender(kernel, stmt):
+            # Stmt matches as depender
+            # Replace all deps
+            for dep_id, dep_maps in stmt.dependencies.items():
+                new_deps[dep_id] = f(dep_maps)
+        else:
+            # Stmt didn't match as a depender
+            # Replace deps matching dependees
+            for dep_id, dep_maps in stmt.dependencies.items():
+                if match_dependee(kernel, kernel.id_to_insn[dep_id]):
+                    new_deps[dep_id] = f(dep_maps)
+                else:
+                    new_deps[dep_id] = dep_maps
+        new_stmts.append(stmt.copy(dependencies=new_deps))
+
+    return kernel.copy(instructions=new_stmts)
+
+
+def map_dependency_maps(
+        kernel, f, stmt_match_depender="id:*", stmt_match_dependee="id:*"):
+    # Set dep_map = f(dep_map) for dep_map in:
+    # All dependencies of stmts matching stmt_match_depender
+    # All dependencies ON stmts matching stmt_match_dependee
+
+    def _update_dep_maps(dep_maps):
+        return [f(dep_map) for dep_map in dep_maps]
 
-    return map_stmt_inst_dependencies(kernel, stmt_match, _update_dep_map)
+    return map_dependency_lists(
+        kernel, _update_dep_maps, stmt_match_depender, stmt_match_dependee)
 
 # }}}
 
@@ -166,7 +208,7 @@ def _add_dep(stmt_deps):
         stmt_deps.setdefault(depends_on_id, []).append(new_dependency)
         return stmt_deps
 
-    result = map_stmt_inst_dependencies(kernel, "id:%s" % (stmt_id), _add_dep)
+    result = map_stmt_dependencies(kernel, "id:%s" % (stmt_id), _add_dep)
 
     return result
 
diff --git a/loopy/transform/parameter.py b/loopy/transform/parameter.py
index 428ed05e9..5c1c0ce2d 100644
--- a/loopy/transform/parameter.py
+++ b/loopy/transform/parameter.py
@@ -89,7 +89,7 @@ def process_set(s):
 
     # {{{ Fix parameter in deps
 
-    from loopy.transform.instruction import map_stmt_inst_dependency_maps
+    from loopy.transform.instruction import map_dependency_maps
     from loopy.schedule.checker.utils import convert_map_to_set
 
     def _fix_parameter_in_dep(dep):
@@ -111,7 +111,7 @@ def _fix_parameter_in_dep(dep):
 
         return map_from_set
 
-    kernel = map_stmt_inst_dependency_maps(kernel, "id:*", _fix_parameter_in_dep)
+    kernel = map_dependency_maps(kernel, _fix_parameter_in_dep)
 
     # }}}
 
diff --git a/loopy/transform/subst.py b/loopy/transform/subst.py
index e4f1cb6ed..e6e146567 100644
--- a/loopy/transform/subst.py
+++ b/loopy/transform/subst.py
@@ -469,7 +469,7 @@ def get_relevant_definition_insn_id(usage_insn_id):
 
     # {{{ update dependencies
 
-    from loopy.transform.instruction import map_stmt_inst_dependencies
+    from loopy.transform.instruction import map_stmt_dependencies
 
     # Add dependencies from each subst_def to any statement where its
     # LHS was found and the subst was performed
@@ -492,7 +492,7 @@ def _add_deps_to_stmt(dep_dict):
                     dep_dict.setdefault(depends_on_id, []).extend(dep_list_copy)
                 return dep_dict
 
-            kernel = map_stmt_inst_dependencies(
+            kernel = map_stmt_dependencies(
                 kernel, match_any_matched_usage_id, _add_deps_to_stmt)
 
     # }}}

From 28c6de6bfbf008c915a2e19ad295ab7f9e36215b Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Wed, 14 Apr 2021 09:16:28 -0500
Subject: [PATCH 267/460] when determining iname domains for schedule map
 creation for a statement, project out inames except stmt.within_inames

---
 loopy/schedule/checker/schedule.py |  5 +--
 test/test_linearization_checker.py | 60 ++++++++++++++++--------------
 2 files changed, 34 insertions(+), 31 deletions(-)

diff --git a/loopy/schedule/checker/schedule.py b/loopy/schedule/checker/schedule.py
index d9029bfdf..726256b45 100644
--- a/loopy/schedule/checker/schedule.py
+++ b/loopy/schedule/checker/schedule.py
@@ -743,10 +743,9 @@ def _get_map_for_stmt(
             stmt_id, lex_points, int_sid, lex_dim_names):
 
         # Get inames domain for statement instance (a BasicSet)
+        within_inames = knl.id_to_insn[stmt_id].within_inames
         dom = knl.get_inames_domain(
-            knl.id_to_insn[stmt_id].within_inames)
-        # (note that this domain may include inames that are
-        # not in stmt.within_inames)
+            within_inames).project_out_except(within_inames, [dt.set])
 
         # Create map space (an isl space in current implementation)
         # {('statement', <inames used in statement domain>) ->
diff --git a/test/test_linearization_checker.py b/test/test_linearization_checker.py
index d61925414..528f10944 100644
--- a/test/test_linearization_checker.py
+++ b/test/test_linearization_checker.py
@@ -785,8 +785,12 @@ def test_sios_and_schedules_with_barriers():
     # Create expected maps and compare
 
     # Iname bound strings to facilitate creation of expected maps
-    iname_bound_str = "ij_start <= i,j< ij_end"
-    iname_bound_str_p = "ij_start <= i',j'< ij_end"
+    i_bound_str = "ij_start <= i < ij_end"
+    i_bound_str_p = "ij_start <= i' < ij_end"
+    j_bound_str = "ij_start <= j < ij_end"
+    j_bound_str_p = "ij_start <= j' < ij_end"
+    ij_bound_str = i_bound_str + " and " + j_bound_str
+    ij_bound_str_p = i_bound_str_p + " and " + j_bound_str_p
     conc_iname_bound_str = "0 <= l0,l1,g0 < lg_end"
     conc_iname_bound_str_p = "0 <= l0',l1',g0' < lg_end"
 
@@ -802,7 +806,7 @@ def test_sios_and_schedules_with_barriers():
                 ["2", "i", "2", "j", "1"],  # lex points
                 lid_inames=["l0", "l1"], gid_inames=["g0"],
                 ),
-            iname_bound_str,
+            ij_bound_str,
             conc_iname_bound_str,
             )
         )
@@ -829,7 +833,7 @@ def test_sios_and_schedules_with_barriers():
         "and {4}"  # param assumptions
         "}}".format(
             STATEMENT_VAR_NAME,
-            iname_bound_str_p,
+            ij_bound_str_p,
             conc_iname_bound_str,
             conc_iname_bound_str_p,
             assumptions,
@@ -850,7 +854,7 @@ def test_sios_and_schedules_with_barriers():
                 ["1", "i", "1"],  # lex points
                 lid_inames=["l0", "l1"], gid_inames=["g0"],
                 ),
-            iname_bound_str,
+            ij_bound_str,
             conc_iname_bound_str,
             )
         )
@@ -876,7 +880,7 @@ def test_sios_and_schedules_with_barriers():
         "and {4}"  # param assumptions
         "}}".format(
             STATEMENT_VAR_NAME,
-            iname_bound_str_p,
+            ij_bound_str_p,
             conc_iname_bound_str,
             conc_iname_bound_str_p,
             assumptions,
@@ -975,7 +979,7 @@ def test_sios_and_schedules_with_barriers():
 
     sched_stmt_i0_intra_group_exp = isl.Map(
         "[ij_start, ij_end, lg_end] -> {"
-        "[%s=1, i, j, l0, l1, g0] -> [%s] : "
+        "[%s=1, i, l0, l1, g0] -> [%s] : "
         "%s and %s}"  # iname bounds
         % (
             STATEMENT_VAR_NAME,
@@ -983,21 +987,21 @@ def test_sios_and_schedules_with_barriers():
                 ["2", "i", "0", "0", "0"],  # lex points
                 lid_inames=["l0", "l1"], gid_inames=["g0"],
                 ),
-            iname_bound_str,
+            i_bound_str,
             conc_iname_bound_str,
             )
         )
 
     sio_intra_group_exp = _isl_map_with_marked_dims(
         "[ij_start, ij_end, lg_end] -> {{ "
-        "[{0}'=0, l0', l1', g0'] -> [{0}=1, i, j, l0, l1, g0] : "
+        "[{0}'=0, l0', l1', g0'] -> [{0}=1, i, l0, l1, g0] : "
         "ij_start + 1 <= i < ij_end "  # not first iteration of i
         "and g0 = g0' "  # within a single group
         "and {1} and {2} and {3} "  # iname bounds
         "and {4}"  # param assumptions
         "}}".format(
             STATEMENT_VAR_NAME,
-            iname_bound_str,
+            i_bound_str,
             conc_iname_bound_str,
             conc_iname_bound_str_p,
             assumptions,
@@ -1023,7 +1027,7 @@ def test_sios_and_schedules_with_barriers():
 
     sched_stmt_i0_global_exp = isl.Map(
         "[ij_start, ij_end, lg_end] -> {"
-        "[%s=1, i, j, l0, l1, g0] -> [%s] : "
+        "[%s=1, i, l0, l1, g0] -> [%s] : "
         "%s and %s}"  # iname bounds
         % (
             STATEMENT_VAR_NAME,
@@ -1031,20 +1035,20 @@ def test_sios_and_schedules_with_barriers():
                 ["1", "i", "0"],  # lex points
                 lid_inames=["l0", "l1"], gid_inames=["g0"],
                 ),
-            iname_bound_str,
+            i_bound_str,
             conc_iname_bound_str,
             )
         )
 
     sio_global_exp = _isl_map_with_marked_dims(
         "[ij_start, ij_end, lg_end] -> {{ "
-        "[{0}'=0, l0', l1', g0'] -> [{0}=1, i, j, l0, l1, g0] : "
+        "[{0}'=0, l0', l1', g0'] -> [{0}=1, i, l0, l1, g0] : "
         "ij_start + 1 <= i < ij_end "  # not first iteration of i
         "and {1} and {2} and {3} "  # iname bounds
         "and {4}"  # param assumptions
         "}}".format(
             STATEMENT_VAR_NAME,
-            iname_bound_str,
+            i_bound_str,
             conc_iname_bound_str,
             conc_iname_bound_str_p,
             assumptions,
@@ -1106,8 +1110,8 @@ def test_sios_and_schedules_with_vec_and_barriers():
     # Create expected maps and compare
 
     # Iname bound strings to facilitate creation of expected maps
-    iname_bound_str = "0 <= i < 4 and 0 <= j < n"
-    iname_bound_str_p = "0 <= i' < 4 and 0 <= j' < n"
+    ij_bound_str = "0 <= i < 4 and 0 <= j < n"
+    ij_bound_str_p = "0 <= i' < 4 and 0 <= j' < n"
     conc_iname_bound_str = "0 <= l0 < 32"
     conc_iname_bound_str_p = "0 <= l0' < 32"
 
@@ -1123,7 +1127,7 @@ def test_sios_and_schedules_with_vec_and_barriers():
                 ["j", "0"],  # lex points (initial matching dim gets removed)
                 lid_inames=["l0"],
                 ),
-            iname_bound_str,
+            ij_bound_str,
             conc_iname_bound_str,
             )
         )
@@ -1138,7 +1142,7 @@ def test_sios_and_schedules_with_vec_and_barriers():
                 ["j", "1"],  # lex points (initial matching dim gets removed)
                 lid_inames=["l0"],
                 ),
-            iname_bound_str,
+            ij_bound_str,
             conc_iname_bound_str,
             )
         )
@@ -1151,8 +1155,8 @@ def test_sios_and_schedules_with_vec_and_barriers():
         "and {1} and {2} and {3} and {4}"  # iname bounds
         "}}".format(
             STATEMENT_VAR_NAME,
-            iname_bound_str,
-            iname_bound_str_p,
+            ij_bound_str,
+            ij_bound_str_p,
             conc_iname_bound_str,
             conc_iname_bound_str_p,
             )
@@ -1176,7 +1180,7 @@ def test_sios_and_schedules_with_vec_and_barriers():
                 ["1", "j", "0"],  # lex points
                 lid_inames=["l0"],
                 ),
-            iname_bound_str,
+            ij_bound_str,
             conc_iname_bound_str,
             )
         )
@@ -1191,7 +1195,7 @@ def test_sios_and_schedules_with_vec_and_barriers():
                 ["1", "j", "1"],  # lex points
                 lid_inames=["l0"],
                 ),
-            iname_bound_str,
+            ij_bound_str,
             conc_iname_bound_str,
             )
         )
@@ -1203,8 +1207,8 @@ def test_sios_and_schedules_with_vec_and_barriers():
         "and {1} and {2} and {3} and {4}"  # iname bounds
         "}}".format(
             STATEMENT_VAR_NAME,
-            iname_bound_str,
-            iname_bound_str_p,
+            ij_bound_str,
+            ij_bound_str_p,
             conc_iname_bound_str,
             conc_iname_bound_str_p,
             )
@@ -1224,7 +1228,7 @@ def test_sios_and_schedules_with_vec_and_barriers():
                 ["0"],  # lex points
                 lid_inames=["l0"],
                 ),
-            iname_bound_str,
+            ij_bound_str,
             conc_iname_bound_str,
             )
         )
@@ -1240,7 +1244,7 @@ def test_sios_and_schedules_with_vec_and_barriers():
                 ["0"],  # lex points
                 lid_inames=["l0"],
                 ),
-            iname_bound_str,
+            ij_bound_str,
             conc_iname_bound_str,
             )
         )
@@ -1252,8 +1256,8 @@ def test_sios_and_schedules_with_vec_and_barriers():
         "and {1} and {2} and {3} and {4}"  # iname bounds
         "}}".format(
             STATEMENT_VAR_NAME,
-            iname_bound_str,
-            iname_bound_str_p,
+            ij_bound_str,
+            ij_bound_str_p,
             conc_iname_bound_str,
             conc_iname_bound_str_p,
             )

From f42f2ac5ab66e74b6b72c0de227b1f9cacc0c6e0 Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Wed, 14 Apr 2021 09:26:48 -0500
Subject: [PATCH 268/460] switch BEFORE_MARK back to apostrophe now that isl
 can handle it during pickling

---
 loopy/schedule/checker/__init__.py | 4 ++--
 loopy/schedule/checker/schedule.py | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/loopy/schedule/checker/__init__.py b/loopy/schedule/checker/__init__.py
index 5811f3048..82f9cb5fc 100644
--- a/loopy/schedule/checker/__init__.py
+++ b/loopy/schedule/checker/__init__.py
@@ -85,8 +85,8 @@ def get_pairwise_statement_orderings(
         >>> print(str(sio_dict[("stmt_a", "stmt_b")].sio_intra_thread
         ...     ).replace("{ ", "{\n").replace(" :", "\n:"))
         [pj, pk] -> {
-        [_lp_linchk_stmt_ = 0, j_, k_] -> [_lp_linchk_stmt = 1, j, k]
-        : 0 <= j_ < pj and 0 <= k_ < pk and 0 <= j < pj and 0 <= k < pk }
+        [_lp_linchk_stmt' = 0, j', k'] -> [_lp_linchk_stmt = 1, j, k]
+        : 0 <= j' < pj and 0 <= k' < pk and 0 <= j < pj and 0 <= k < pk }
 
     """
 
diff --git a/loopy/schedule/checker/schedule.py b/loopy/schedule/checker/schedule.py
index 345257af0..726256b45 100644
--- a/loopy/schedule/checker/schedule.py
+++ b/loopy/schedule/checker/schedule.py
@@ -72,7 +72,7 @@
 for par_level in [0, 1, 2]:
     LTAG_VAR_NAMES.append("%slid%d" % (LIN_CHECK_IDENTIFIER_PREFIX, par_level))
     GTAG_VAR_NAMES.append("%sgid%d" % (LIN_CHECK_IDENTIFIER_PREFIX, par_level))
-BEFORE_MARK = "_"  # TODO switch back to apostrophe after islpy is updated
+BEFORE_MARK = "'"
 
 # }}}
 

From a202ffe2d8caf6ad427f18532391264203d1f641 Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Wed, 14 Apr 2021 09:45:37 -0500
Subject: [PATCH 269/460] fix doctest after projecting out unused inames from
 sched dims

---
 loopy/schedule/checker/__init__.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/loopy/schedule/checker/__init__.py b/loopy/schedule/checker/__init__.py
index 82f9cb5fc..bbea06dca 100644
--- a/loopy/schedule/checker/__init__.py
+++ b/loopy/schedule/checker/__init__.py
@@ -85,8 +85,8 @@ def get_pairwise_statement_orderings(
         >>> print(str(sio_dict[("stmt_a", "stmt_b")].sio_intra_thread
         ...     ).replace("{ ", "{\n").replace(" :", "\n:"))
         [pj, pk] -> {
-        [_lp_linchk_stmt' = 0, j', k'] -> [_lp_linchk_stmt = 1, j, k]
-        : 0 <= j' < pj and 0 <= k' < pk and 0 <= j < pj and 0 <= k < pk }
+        [_lp_linchk_stmt' = 0, j'] -> [_lp_linchk_stmt = 1, k]
+        : pj > 0 and pk > 0 and 0 <= j' < pj and 0 <= k < pk }
 
     """
 

From 12a9fc6fc294610a256fc80e09e11f3cfc06ee5a Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Wed, 14 Apr 2021 10:02:23 -0500
Subject: [PATCH 270/460] fix test_split_iname_with_dependencies() after
 projecting out unused inames from sched dims

---
 test/test_linearization_checker.py | 28 ++++++++++++++++------------
 1 file changed, 16 insertions(+), 12 deletions(-)

diff --git a/test/test_linearization_checker.py b/test/test_linearization_checker.py
index 1ad8120de..aa0495ef3 100644
--- a/test/test_linearization_checker.py
+++ b/test/test_linearization_checker.py
@@ -1613,7 +1613,7 @@ def test_split_iname_with_dependencies():
     # {{{ more deps that should be satisfied
 
     knl = lp.make_kernel(
-        ["{[i,j]: 0<=i,j<p}", "{[k,m]: 0<=k,m<p}"],
+        "{[i,j,k,m]: 0<=i,j,k,m<p}",
         """
         a[i,k] = 0.1  {id=stmt0}
         b[i,k] = a[i,k]  {id=stmt1,dep=stmt0}
@@ -1625,22 +1625,26 @@ def test_split_iname_with_dependencies():
         lang_version=(2018, 2)
         )
 
-    dep_inout_space_str = "[{0}'=0, i', j', k', m'] -> [{0}=1, i, j, k, m]".format(
+    dep_ik_space_str = "[{0}'=0, i', k'] -> [{0}=1, i, k]".format(
         STATEMENT_VAR_NAME)
-    iname_bounds_str = "0 <= i,j,k,m,i',j',k',m' < p"
-    dep1 = _isl_map_with_marked_dims(
+    dep_ijkm_space_str = "[{0}'=0, i', j', k', m'] -> [{0}=1, i, j, k, m]".format(
+        STATEMENT_VAR_NAME)
+    #iname_bounds_str = "0 <= i,j,k,m,i',j',k',m' < p"
+    ik_bounds_str = "0 <= i,k,i',k' < p"
+    ijkm_bounds_str = ik_bounds_str + " and 0 <= j,m,j',m' < p"
+    dep_stmt1_on_stmt0_eq = _isl_map_with_marked_dims(
         "[p] -> { %s : %s and i' = i and k' = k}"
-        % (dep_inout_space_str, iname_bounds_str))
-    dep2 = _isl_map_with_marked_dims(
+        % (dep_ik_space_str, ik_bounds_str))
+    dep_stmt1_on_stmt0_lt = _isl_map_with_marked_dims(
         "[p] -> { %s : %s and i' < i and k' < k}"
-        % (dep_inout_space_str, iname_bounds_str))
-    dep3 = _isl_map_with_marked_dims(
+        % (dep_ik_space_str, ik_bounds_str))
+    dep_stmt3_on_stmt2_eq = _isl_map_with_marked_dims(
         "[p] -> { %s : %s and i' = i and k' = k and j' = j and m' = m}"
-        % (dep_inout_space_str, iname_bounds_str))
+        % (dep_ijkm_space_str, ijkm_bounds_str))
 
-    knl = lp.add_stmt_inst_dependency(knl, "stmt1", "stmt0", dep1)
-    knl = lp.add_stmt_inst_dependency(knl, "stmt1", "stmt0", dep2)
-    knl = lp.add_stmt_inst_dependency(knl, "stmt3", "stmt2", dep3)
+    knl = lp.add_stmt_inst_dependency(knl, "stmt1", "stmt0", dep_stmt1_on_stmt0_eq)
+    knl = lp.add_stmt_inst_dependency(knl, "stmt1", "stmt0", dep_stmt1_on_stmt0_lt)
+    knl = lp.add_stmt_inst_dependency(knl, "stmt3", "stmt2", dep_stmt3_on_stmt2_eq)
 
     # Gratuitous splitting
     knl = lp.split_iname(knl, "i", 64)

From 5206882d0af1a1c7643792fd737282e195060247 Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Wed, 14 Apr 2021 11:11:01 -0500
Subject: [PATCH 271/460] add test for dep checking with finite differences
 dependency example, including barrier handling

---
 test/test_linearization_checker.py | 121 ++++++++++++++++++++++++++++-
 1 file changed, 119 insertions(+), 2 deletions(-)

diff --git a/test/test_linearization_checker.py b/test/test_linearization_checker.py
index 35009d718..bee9cc988 100644
--- a/test/test_linearization_checker.py
+++ b/test/test_linearization_checker.py
@@ -1285,7 +1285,7 @@ def test_sios_and_schedules_with_vec_and_barriers():
 
 def test_add_stmt_inst_dependency():
 
-    # Make kernel and use OLD deps to linearize correctly for now
+    # Make kernel and use OLD deps to control linearization order for now
     i_range_str = "0 <= i < pi"
     i_range_str_p = "0 <= i' < pi"
     assumptions_str = "pi >= 1"
@@ -1392,7 +1392,124 @@ def test_add_stmt_inst_dependency():
     assert not unsatisfied_deps
 
 
-# TODO create more kernels with valid/invalid linearizations to test checker
+def test_new_dependencies_finite_diff():
+
+    # Define kernel
+    knl = lp.make_kernel(
+        "[nx,nt] -> {[x, t]: 0<=x<nx and 0<=t<nt}",
+        "u[t+2,x+1] = 2*u[t+1,x+1] + dt**2/dx**2 "
+        "* (u[t+1,x+2] - 2*u[t+1,x+1] + u[t+1,x]) - u[t,x+1]  {id=stmt}")
+    knl = lp.add_dtypes(
+        knl, {"u": np.float32, "dx": np.float32, "dt": np.float32})
+
+    # Define dependency
+    xt_range_str = "0 <= x < nx and 0 <= t < nt"
+    xt_range_str_p = "0 <= x' < nx and 0 <= t' < nt"
+    dep = _isl_map_with_marked_dims(
+        "[nx,nt] -> {{ [{0}'=0, x', t'] -> [{0}=0, x, t] : "
+        "((x = x' and t = t'+2) or "
+        " (x'-1 <= x <= x'+1 and t = t' + 1)) and "
+        "{1} and {2} }}".format(
+            STATEMENT_VAR_NAME,
+            xt_range_str,
+            xt_range_str_p,
+            ))
+    knl = lp.add_stmt_inst_dependency(knl, "stmt", "stmt", dep)
+
+    ref_knl = knl
+
+    # {{{ Check with corrct loop nest order
+
+    # Prioritize loops correctly
+    knl = lp.prioritize_loops(knl, "t,x")
+
+    # Make sure deps are satisfied
+    proc_knl = preprocess_kernel(knl)
+    lin_knl = get_one_linearized_kernel(proc_knl)
+    lin_items = lin_knl.linearization
+
+    unsatisfied_deps = lp.find_unsatisfied_dependencies(
+        proc_knl, lin_items)
+
+    print(lp.generate_code_v2(lin_knl).device_code())
+    assert not unsatisfied_deps
+
+    # }}}
+    # {{{ Check with incorrect loop nest order
+
+    # Now prioritize loops incorrectly
+    knl = ref_knl
+    knl = lp.prioritize_loops(knl, "x,t")
+
+    # Make sure unsatisfied deps are caught
+    proc_knl = preprocess_kernel(knl)
+    lin_knl = get_one_linearized_kernel(proc_knl)
+    lin_items = lin_knl.linearization
+
+    unsatisfied_deps = lp.find_unsatisfied_dependencies(
+        proc_knl, lin_items)
+
+    print(lp.generate_code_v2(lin_knl).device_code())
+    assert len(unsatisfied_deps) == 1
+
+    # }}}
+    # {{{ Check with parallel x and no barrier
+
+    # Parallelize the x loop
+    knl = ref_knl
+    knl = lp.prioritize_loops(knl, "t,x")
+    knl = lp.tag_inames(knl, "x:l.0")
+
+    # Make sure unsatisfied deps are caught
+    proc_knl = preprocess_kernel(knl)
+    lin_knl = get_one_linearized_kernel(proc_knl)
+    lin_items = lin_knl.linearization
+
+    # Without a barrier, deps not satisfied
+    # Make sure there is no barrier, and that unsatisfied deps are caught
+    from loopy.schedule import Barrier
+    print(lp.generate_code_v2(lin_knl).device_code())
+    for lin_item in lin_items:
+        assert not isinstance(lin_item, Barrier)
+
+    unsatisfied_deps = lp.find_unsatisfied_dependencies(
+        proc_knl, lin_items)
+
+    assert len(unsatisfied_deps) == 1
+
+    # }}}
+    # {{{ Check with parallel x and included barrier
+
+    # Insert a barrier to satisfy deps
+    knl = lp.make_kernel(
+        "[nx,nt] -> {[x, t]: 0<=x<nx and 0<=t<nt}",
+        """
+        for x,t
+            ...lbarrier
+            u[t+2,x+1] = 2*u[t+1,x+1] + dt**2/dx**2 \
+                *(u[t+1,x+2] - 2*u[t+1,x+1] + u[t+1,x]) - u[t,x+1]  {id=stmt}
+        end
+        """)
+    knl = lp.add_dtypes(
+        knl, {"u": np.float32, "dx": np.float32, "dt": np.float32})
+
+    # Make sure deps are satisfied
+    proc_knl = preprocess_kernel(knl)
+    lin_knl = get_one_linearized_kernel(proc_knl)
+    lin_items = lin_knl.linearization
+    print(lp.generate_code_v2(lin_knl).device_code())
+
+    unsatisfied_deps = lp.find_unsatisfied_dependencies(
+        proc_knl, lin_items)
+
+    assert not unsatisfied_deps
+
+    # }}}
+
+    # Transformations to test after dep handling during transformation:
+    # knl = lp.split_iname(knl, "x", 14)
+    # knl = lp.assume(knl, "nx % 14 = 0 and nt >= 1 and nx >= 1")
+    # knl = lp.tag_inames(knl, "x_outer:g.0, x_inner:l.0")
 
 
 if __name__ == "__main__":

From 167060b823f199a7d621809ddd0d82e1ce3bf4ac Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Thu, 15 Apr 2021 10:35:49 -0500
Subject: [PATCH 272/460] fix doctest

---
 loopy/schedule/checker/__init__.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/loopy/schedule/checker/__init__.py b/loopy/schedule/checker/__init__.py
index 5a492660b..190a29c27 100644
--- a/loopy/schedule/checker/__init__.py
+++ b/loopy/schedule/checker/__init__.py
@@ -85,8 +85,8 @@ def get_pairwise_statement_orderings(
         >>> print(str(sio_dict[("stmt_a", "stmt_b")].sio_intra_thread
         ...     ).replace("{ ", "{\n").replace(" :", "\n:"))
         [pj, pk] -> {
-        [_lp_linchk_stmt' = 0, j', k'] -> [_lp_linchk_stmt = 1, j, k]
-        : 0 <= j' < pj and 0 <= k' < pk and 0 <= j < pj and 0 <= k < pk }
+        [_lp_linchk_stmt' = 0, j'] -> [_lp_linchk_stmt = 1, k]
+        : pj > 0 and pk > 0 and 0 <= j' < pj and 0 <= k < pk }
 
     """
 

From 1b61319212420a93ab37a6af9b1f2163ecbb303d Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Thu, 15 Apr 2021 11:29:14 -0500
Subject: [PATCH 273/460] add docstring for find_unsatisfied_dependencies()

---
 loopy/schedule/checker/__init__.py | 44 ++++++++++++++++++++++++++++--
 1 file changed, 41 insertions(+), 3 deletions(-)

diff --git a/loopy/schedule/checker/__init__.py b/loopy/schedule/checker/__init__.py
index bbea06dca..a343a70c9 100644
--- a/loopy/schedule/checker/__init__.py
+++ b/loopy/schedule/checker/__init__.py
@@ -142,10 +142,48 @@ def get_pairwise_statement_orderings(
 
 def find_unsatisfied_dependencies(
         knl,
-        linearization_items,
+        lin_items,
         ):
+    """For each statement (:class:`loopy.InstructionBase`) found in a
+    preprocessed kernel, determine which dependencies, if any, have been
+    violated by the linearization described by `lin_items`, and return these
+    dependencies.
+
+    :arg knl: A preprocessed (or linearized) :class:`loopy.kernel.LoopKernel`
+        containing the statements (:class:`loopy.InstructionBase`) whose
+        dependencies will be checked against the linearization items.
+
+    :arg lin_items: A list of :class:`loopy.schedule.ScheduleItem`
+        (to be renamed to `loopy.schedule.LinearizationItem`) containing all
+        linearization items in `knl.linearization`. To allow usage of
+        this routine during linearization, a truncated (i.e. partial)
+        linearization may be passed through this argument.
 
-    # TODO document
+    :returns: A list of unsatisfied dependencies, each described using a
+        :class:`collections.namedtuple` containing the following:
+
+        - `statement_pair`: The (before, after) pair of statement IDs involved
+          in the dependency.
+        - `dependency`: An class:`islpy.Map` from each instance of the first
+          statement to all instances of the second statement that must occur
+          later.
+        - `statement_ordering`: A statement ordering information tuple
+          resulting from `lp.get_pairwise_statement_orderings`, a
+          :class:`collections.namedtuple` containing the intra-thread
+          statement instance ordering (SIO) (`sio_intra_thread`),
+          intra-group SIO (`sio_intra_group`), and global
+          SIO (`sio_global`), each realized as an :class:`islpy.Map` from each
+          instance of the first statement to all instances of the second
+          statement that occur later, as well as the intra-thread pairwise
+          schedule (`pwsched_intra_thread`), intra-group pairwise schedule
+          (`pwsched_intra_group`), and the global pairwise schedule
+          (`pwsched_global`), each containing a pair of mappings from statement
+          instances to points in a lexicographic ordering, one for each
+          statement. Note that a pairwise schedule alone cannot be used to
+          reproduce the corresponding SIO without the corresponding (unique)
+          lexicographic order map, which is not returned.
+
+    """
 
     # {{{ make sure kernel has been preprocessed
 
@@ -182,7 +220,7 @@ def find_unsatisfied_dependencies(
 
     pworders = get_pairwise_statement_orderings(
         knl,
-        linearization_items,
+        lin_items,
         stmt_pairs_to_deps.keys(),
         )
 

From 00d973735ec3ea0a07275d288d65b8a24ec08d03 Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Thu, 15 Apr 2021 11:38:27 -0500
Subject: [PATCH 274/460] add docstring to add_stmt_inst_dependency()

---
 loopy/transform/instruction.py | 18 ++++++++++++++++--
 1 file changed, 16 insertions(+), 2 deletions(-)

diff --git a/loopy/transform/instruction.py b/loopy/transform/instruction.py
index 703d1d243..73bc9ba13 100644
--- a/loopy/transform/instruction.py
+++ b/loopy/transform/instruction.py
@@ -121,8 +121,22 @@ def add_dep(insn):
 
 def add_stmt_inst_dependency(
         kernel, stmt_id, depends_on_id, new_dependency):
-    """Add the statement instance dependency *new_dependency* to statement with
-    id *stmt_id*.
+    """Add the statement instance dependency `new_dependency` to the statement with
+    id `stmt_id`.
+
+    :arg kernel: A :class:`loopy.kernel.LoopKernel`.
+
+    :arg stmt_id: The :class:`str` statement identifier of the statement to
+        which the dependency will be added.
+
+    :arg depends_on_id: The :class:`str` identifier of the statement that is
+        depended on, i.e., the statement with statement instances that must
+        happen before those of `stmt_id`.
+
+    :arg new_dependency: An class:`islpy.Map` from each instance of the first
+        statement to all instances of the second statement that must occur
+        later.
+
     """
 
     if stmt_id not in kernel.id_to_insn:

From 887f64e9847cd9b3d2480e6f8d89667f4097f6f4 Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Thu, 15 Apr 2021 11:53:39 -0500
Subject: [PATCH 275/460] add dependencies attribute to docstring for
 InstructionBase; handle dependencies in InstructionBase.get_str_options()

---
 loopy/kernel/instruction.py | 16 +++++++++++++---
 1 file changed, 13 insertions(+), 3 deletions(-)

diff --git a/loopy/kernel/instruction.py b/loopy/kernel/instruction.py
index 951d9975c..7933430ee 100644
--- a/loopy/kernel/instruction.py
+++ b/loopy/kernel/instruction.py
@@ -75,7 +75,7 @@ class UseStreamingStoreTag(Tag):
 # {{{ instructions: base class
 
 class InstructionBase(ImmutableRecord, Taggable):
-    """A base class for all types of instruction that can occur in
+    r"""A base class for all types of instruction that can occur in
     a kernel.
 
     .. attribute:: id
@@ -87,7 +87,7 @@ class InstructionBase(ImmutableRecord, Taggable):
 
     .. attribute:: depends_on
 
-        a :class:`frozenset` of :attr:`id` values of :class:`InstructionBase`
+        A :class:`frozenset` of :attr:`id` values of :class:`InstructionBase`
         instances that *must* be executed before this one. Note that
         :func:`loopy.preprocess_kernel` (usually invoked automatically)
         augments this by adding dependencies on any writes to temporaries read
@@ -106,6 +106,15 @@ class InstructionBase(ImmutableRecord, Taggable):
           :func:`loopy.make_kernel`. Note, that this is not meant as a user-facing
           interface.
 
+    .. attribute:: dependencies
+
+        A :class:`dict` mapping :attr:`id` values of :class:`InstructionBase`
+        instances (each referring to a statement with statement instances that
+        must be executed before instances of this statement) to lists (one list
+        per key) of class:`islpy.Map`\ s mapping each instance of the dependee
+        statement to all instances of this statement that must occur later. Note
+        that this dict will eventually replace the `depends_on` attribute.
+
     .. attribute:: depends_on_is_final
 
         A :class:`bool` determining whether :attr:`depends_on` constitutes
@@ -393,7 +402,8 @@ def get_str_options(self):
 
         if self.depends_on:
             result.append("dep="+":".join(self.depends_on))
-        # TODO something with dependencies?
+        if self.dependencies:
+            result.append("dependencies="+":".join(self.dependencies.keys()))
         if self.no_sync_with:
             result.append("nosync="+":".join(
                     "%s@%s" % entry for entry in self.no_sync_with))

From 5bc848887a6ec0ada34bbb684cf6c763e0ed9fc6 Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Thu, 15 Apr 2021 17:09:42 -0500
Subject: [PATCH 276/460] only parse_match if it hasn't already been called

---
 loopy/transform/instruction.py | 14 +++++++++++---
 1 file changed, 11 insertions(+), 3 deletions(-)

diff --git a/loopy/transform/instruction.py b/loopy/transform/instruction.py
index c6bd8b95f..7be058032 100644
--- a/loopy/transform/instruction.py
+++ b/loopy/transform/instruction.py
@@ -139,15 +139,23 @@ def _update_deps(stmt):
     return map_instructions(kernel, stmt_match, _update_deps)
 
 
+def _parse_match_if_necessary(match_candidate):
+    from loopy.match import MatchExpressionBase
+    if not isinstance(match_candidate, MatchExpressionBase):
+        from loopy.match import parse_match
+        return parse_match(match_candidate)
+    else:
+        return match_candidate
+
+
 def map_dependency_lists(
         kernel, f, stmt_match_depender="id:*", stmt_match_dependee="id:*"):
     # Set dependency = f(dependency) for:
     # All deps of stmts matching stmt_match_depender
     # All deps ON stmts matching stmt_match_dependee
 
-    from loopy.match import parse_match
-    match_depender = parse_match(stmt_match_depender)
-    match_dependee = parse_match(stmt_match_dependee)
+    match_depender = _parse_match_if_necessary(stmt_match_depender)
+    match_dependee = _parse_match_if_necessary(stmt_match_dependee)
 
     new_stmts = []
 

From 91378b527eec9bbc8f3d98775a1d2cbc62c08cf4 Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Thu, 15 Apr 2021 17:10:19 -0500
Subject: [PATCH 277/460] update dependencies when duplicate_inames() is called

---
 loopy/transform/iname.py | 45 +++++++++++++++++++++++++++++++++++-----
 1 file changed, 40 insertions(+), 5 deletions(-)

diff --git a/loopy/transform/iname.py b/loopy/transform/iname.py
index 5e4fdb724..8c4f19e9d 100644
--- a/loopy/transform/iname.py
+++ b/loopy/transform/iname.py
@@ -268,6 +268,9 @@ def _split_iname_in_dep(dep):
         if iname_to_split not in dep.get_var_names(dt.out):
             return dep
 
+        # TODO dep in-dims may not match dep out-dims, need to check for iname
+        # in dt.in as well!!!!
+
         # Temporarily convert map to set for processing
         set_from_map, n_in_dims, n_out_dims = convert_map_to_set(dep)
 
@@ -889,7 +892,7 @@ def duplicate_inames(kernel, inames, within, new_inames=None, suffix=None,
         new_inames = [iname.strip() for iname in new_inames.split(",")]
 
     from loopy.match import parse_stack_match
-    within = parse_stack_match(within)
+    within_sm = parse_stack_match(within)
 
     if new_inames is None:
         new_inames = [None] * len(inames)
@@ -922,7 +925,7 @@ def duplicate_inames(kernel, inames, within, new_inames=None, suffix=None,
 
     # }}}
 
-    # {{{ duplicate the inames
+    # {{{ duplicate the inames in domains
 
     for old_iname, new_iname in zip(inames, new_inames):
         from loopy.kernel.tools import DomainChanger
@@ -933,7 +936,39 @@ def duplicate_inames(kernel, inames, within, new_inames=None, suffix=None,
                 domains=domch.get_domains_with(
                     duplicate_axes(domch.domain, [old_iname], [new_iname])))
 
-        # TODO For any statements matching 'within', duplicate iname in deps...?
+        # {{{ *Rename* iname in dependencies
+
+        from loopy.transform.instruction import map_dependency_maps
+        from loopy.schedule.checker.schedule import BEFORE_MARK
+        dt = isl.dim_type
+        old_iname_p = old_iname+BEFORE_MARK
+        new_iname_p = new_iname+BEFORE_MARK
+
+        def _rename_iname_in_dep_out(dep):
+            # update iname in out-dim
+            out_idx = dep.find_dim_by_name(dt.out, old_iname)
+            if out_idx != -1:
+                dep = dep.set_dim_name(dt.out, out_idx, new_iname)
+            return dep
+
+        def _rename_iname_in_dep_in(dep):
+            # update iname in in-dim
+            in_idx = dep.find_dim_by_name(dt.in_, old_iname_p)
+            if in_idx != -1:
+                dep = dep.set_dim_name(dt.in_, in_idx, new_iname_p)
+            return dep
+
+        # TODO figure out proper way to match none
+        # TODO figure out match vs stack_match
+        false_id_match = "id:false and (not id:false)"
+        kernel = map_dependency_maps(
+            kernel, _rename_iname_in_dep_out,
+            stmt_match_depender=within, stmt_match_dependee=false_id_match)
+        kernel = map_dependency_maps(
+            kernel, _rename_iname_in_dep_in,
+            stmt_match_depender=false_id_match, stmt_match_dependee=within)
+
+        # }}}
 
     # }}}
 
@@ -943,10 +978,10 @@ def duplicate_inames(kernel, inames, within, new_inames=None, suffix=None,
             kernel.substitutions, name_gen)
     indup = _InameDuplicator(rule_mapping_context,
             old_to_new=dict(list(zip(inames, new_inames))),
-            within=within)
+            within=within_sm)
 
     kernel = rule_mapping_context.finish_kernel(
-            indup.map_kernel(kernel, within=within))
+            indup.map_kernel(kernel, within=within_sm))
 
     # }}}
 

From 83a05f3aa4936cbeb70fe2fac0f1a14764d9ccae Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Thu, 15 Apr 2021 17:10:56 -0500
Subject: [PATCH 278/460] test handling of deps during duplicate_inames

---
 test/test_linearization_checker.py | 101 +++++++++++++++++++++++++++++
 1 file changed, 101 insertions(+)

diff --git a/test/test_linearization_checker.py b/test/test_linearization_checker.py
index fc3e5c3d5..ed3a5442f 100644
--- a/test/test_linearization_checker.py
+++ b/test/test_linearization_checker.py
@@ -1392,6 +1392,8 @@ def test_add_stmt_inst_dependency():
     assert not unsatisfied_deps
 
 
+# {{{ test_new_dependencies_finite_diff
+
 def test_new_dependencies_finite_diff():
 
     # Define kernel
@@ -1511,9 +1513,13 @@ def test_new_dependencies_finite_diff():
     # knl = lp.assume(knl, "nx % 14 = 0 and nt >= 1 and nx >= 1")
     # knl = lp.tag_inames(knl, "x_outer:g.0, x_inner:l.0")
 
+# }}}
+
 
 # {{{ Check dependency handling during transformations
 
+# {{{ test_fix_parameters_with_dependencies
+
 def test_fix_parameters_with_dependencies():
     knl = lp.make_kernel(
         "{[i,j]: 0 <= i < n and 0 <= j < m}",
@@ -1553,6 +1559,10 @@ def test_fix_parameters_with_dependencies():
         # Check dep
         _align_and_compare_maps([(dep_exp, deps_found[dep_id][0])])
 
+# }}}
+
+
+# {{{ test_assignment_to_subst_with_dependencies
 
 def test_assignment_to_subst_with_dependencies():
     knl = lp.make_kernel(
@@ -1652,6 +1662,95 @@ def test_assignment_to_subst_with_dependencies():
         # Should still depend on stmt1
         _align_and_compare_maps([(dep_eq, deps_found["stmt1"][0])])
 
+# }}}
+
+
+# {{{ test_duplicate_inames_with_dependencies
+
+def test_duplicate_inames_with_dependencies():
+
+    knl = lp.make_kernel(
+        "{[i,j]: 0 <= i,j < n}",
+        """
+        b[i,j] = a[i,j]  {id=stmtb}
+        c[i,j] = a[i,j]  {id=stmtc,dep=stmtb}
+        """)
+    knl = lp.add_and_infer_dtypes(knl, {"a": np.float32})
+
+    dep_eq = _isl_map_with_marked_dims(
+        "[n] -> {{ [{0}'=0, i', j']->[{0}=1, i, j] : "
+        "0 <= i,i',j,j' < n and i' = i and j' = j"
+        "}}".format(STATEMENT_VAR_NAME))
+
+    # Create dep stmtb->stmtc
+    knl = lp.add_stmt_inst_dependency(knl, "stmtc", "stmtb", dep_eq)
+
+    ref_knl = knl
+
+    def _check_deps(transformed_knl, c_dep_exp):
+        b_deps = transformed_knl.id_to_insn["stmtb"].dependencies
+        c_deps = transformed_knl.id_to_insn["stmtc"].dependencies
+
+        assert not b_deps
+        assert len(c_deps) == 1
+        assert len(c_deps["stmtb"]) == 1
+        _align_and_compare_maps([(c_deps["stmtb"][0], c_dep_exp)])
+
+        # Check dep satisfaction
+        proc_knl = preprocess_kernel(transformed_knl)
+        lin_knl = get_one_linearized_kernel(proc_knl)
+        lin_items = lin_knl.linearization
+        unsatisfied_deps = lp.find_unsatisfied_dependencies(
+            proc_knl, lin_items)
+
+        assert not unsatisfied_deps
+
+    # {{{ Duplicate j within stmtc
+
+    knl = lp.duplicate_inames(knl, ["j"], within="id:stmtc", new_inames=["j_new"])
+
+    dep_exp = _isl_map_with_marked_dims(
+        "[n] -> {{ [{0}'=0, i', j']->[{0}=1, i, j_new] : "
+        "0 <= i,i',j_new,j' < n and i' = i and j' = j_new"
+        "}}".format(STATEMENT_VAR_NAME))
+
+    _check_deps(knl, dep_exp)
+
+    # }}}
+
+    # {{{ Duplicate j within stmtb
+
+    knl = ref_knl
+    knl = lp.duplicate_inames(knl, ["j"], within="id:stmtb", new_inames=["j_new"])
+
+    dep_exp = _isl_map_with_marked_dims(
+        "[n] -> {{ [{0}'=0, i', j_new']->[{0}=1, i, j] : "
+        "0 <= i,i',j,j_new' < n and i' = i and j_new' = j"
+        "}}".format(STATEMENT_VAR_NAME))
+
+    _check_deps(knl, dep_exp)
+
+    # }}}
+
+    # {{{ Duplicate j within stmtb and stmtc
+
+    knl = ref_knl
+    knl = lp.duplicate_inames(
+        knl, ["j"], within="id:stmtb or id:stmtc", new_inames=["j_new"])
+
+    dep_exp = _isl_map_with_marked_dims(
+        "[n] -> {{ [{0}'=0, i', j_new']->[{0}=1, i, j_new] : "
+        "0 <= i,i',j_new,j_new' < n and i' = i and j_new' = j_new"
+        "}}".format(STATEMENT_VAR_NAME))
+
+    _check_deps(knl, dep_exp)
+
+    # }}}
+
+# }}}
+
+
+# {{{ def test_split_iname_with_dependencies
 
 def test_split_iname_with_dependencies():
     knl = lp.make_kernel(
@@ -1791,6 +1890,8 @@ def test_split_iname_with_dependencies():
 
 # }}}
 
+# }}}
+
 
 if __name__ == "__main__":
     if len(sys.argv) > 1:

From f07e77027aee05ef136b4ecdbb4445dcaa073dd9 Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Thu, 15 Apr 2021 19:25:39 -0500
Subject: [PATCH 279/460] create remove_dim_by_name() func

---
 loopy/schedule/checker/utils.py | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/loopy/schedule/checker/utils.py b/loopy/schedule/checker/utils.py
index 8f72596b6..364ad0c8e 100644
--- a/loopy/schedule/checker/utils.py
+++ b/loopy/schedule/checker/utils.py
@@ -84,6 +84,13 @@ def reorder_dims_by_name(
     return new_set
 
 
+def remove_dim_by_name(isl_map, dim_type, dim_name):
+    idx = isl_map.find_dim_by_name(dim_type, dim_name)
+    if idx == -1:
+        raise ValueError("Dim '%s' not found. Cannot remove dim.")
+    return isl_map.remove_dims(dim_type, idx, 1)
+
+
 def ensure_dim_names_match_and_align(obj_map, tgt_map):
 
     # first make sure names match

From 1578bc0dab8321280bad01b9fb1368a42209133f Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Thu, 15 Apr 2021 19:27:04 -0500
Subject: [PATCH 280/460] handle dependencies correctly during split_iname when
 'within' is not None

---
 loopy/transform/iname.py | 76 ++++++++++++++++++++++++++--------------
 1 file changed, 50 insertions(+), 26 deletions(-)

diff --git a/loopy/transform/iname.py b/loopy/transform/iname.py
index 8c4f19e9d..bf6a2ca54 100644
--- a/loopy/transform/iname.py
+++ b/loopy/transform/iname.py
@@ -259,54 +259,78 @@ def _split_iname_backend(kernel, iname_to_split,
 
     from loopy.transform.instruction import map_dependency_maps
     from loopy.schedule.checker.schedule import BEFORE_MARK
-    from loopy.schedule.checker.utils import convert_map_to_set
+    from loopy.schedule.checker.utils import (
+        convert_map_to_set,
+        remove_dim_by_name,
+    )
+    dt = isl.dim_type
 
-    def _split_iname_in_dep(dep):
-        dt = isl.dim_type
+    def _split_iname_in_depender(dep):
 
         # If iname is not present in dep, return unmodified dep
         if iname_to_split not in dep.get_var_names(dt.out):
             return dep
 
-        # TODO dep in-dims may not match dep out-dims, need to check for iname
-        # in dt.in as well!!!!
-
         # Temporarily convert map to set for processing
         set_from_map, n_in_dims, n_out_dims = convert_map_to_set(dep)
 
         # Split iname
         set_from_map = _split_iname_in_set(
-            set_from_map,
-            iname_to_split,
-            inner_iname,
-            outer_iname,
-            fixed_length,
-            fixed_length_is_inner)
+            set_from_map, iname_to_split, inner_iname, outer_iname,
+            fixed_length, fixed_length_is_inner)
+
+        # Dim order: [old_inames' ..., old_inames ..., i_outer, i_inner]
+
+        # Convert set back to map
+        map_from_set = isl.Map.from_domain(set_from_map)
+        # Move original out dims + 2 new dims:
+        map_from_set = map_from_set.move_dims(
+            dt.out, 0, dt.in_, n_in_dims, n_out_dims+2)
+
+        # Remove iname that was split:
+        map_from_set = remove_dim_by_name(
+            map_from_set, dt.out, iname_to_split)
+
+        return map_from_set
+
+    def _split_iname_in_dependee(dep):
+
+        iname_to_split_marked = iname_to_split+BEFORE_MARK
+
+        # If iname is not present in dep, return unmodified dep
+        if iname_to_split_marked not in dep.get_var_names(dt.in_):
+            return dep
+
+        # Temporarily convert map to set for processing
+        set_from_map, n_in_dims, n_out_dims = convert_map_to_set(dep)
+
         # Split iname'
         set_from_map = _split_iname_in_set(
-            set_from_map,
-            iname_to_split+BEFORE_MARK,
-            inner_iname+BEFORE_MARK,
-            outer_iname+BEFORE_MARK,
-            fixed_length,
-            fixed_length_is_inner)
+            set_from_map, iname_to_split_marked,
+            inner_iname+BEFORE_MARK, outer_iname+BEFORE_MARK,
+            fixed_length, fixed_length_is_inner)
 
-        # Now set dims look like
-        # [old_inames' ..., old_inames ..., i_outer, i_inner, i_outer', i_inner']
+        # Dim order: [old_inames' ..., old_inames ..., i_outer', i_inner']
 
         # Convert set back to map
         map_from_set = isl.Map.from_domain(set_from_map)
-        # move original out dims + 2 new dims:
+        # Move original out dims new dims:
         map_from_set = map_from_set.move_dims(
-            dt.out, 0, dt.in_, n_in_dims, n_out_dims+2)
+            dt.out, 0, dt.in_, n_in_dims, n_out_dims)
+
+        # Remove iname that was split:
+        map_from_set = remove_dim_by_name(
+            map_from_set, dt.in_, iname_to_split_marked)
 
         return map_from_set
 
-    # TODO currently this gets applied to all maps
-    # instead, handle 'within'
+    false_id_match = "id:false and (not id:false)"
+    kernel = map_dependency_maps(
+        kernel, _split_iname_in_depender,
+        stmt_match_depender=within, stmt_match_dependee=false_id_match)
     kernel = map_dependency_maps(
-        kernel, _split_iname_in_dep,
-        stmt_match_depender="id:*", stmt_match_dependee="id:*")
+        kernel, _split_iname_in_dependee,
+        stmt_match_depender=false_id_match, stmt_match_dependee=within)
 
     # }}}
 

From 06fdc7c95260cff07e097b1f9df578d534507856 Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Thu, 15 Apr 2021 19:27:34 -0500
Subject: [PATCH 281/460] test handling of dependencies during split_iname when
 'within' is not None

---
 test/test_linearization_checker.py | 96 +++++++++++++++++++++---------
 1 file changed, 69 insertions(+), 27 deletions(-)

diff --git a/test/test_linearization_checker.py b/test/test_linearization_checker.py
index ed3a5442f..6845b615e 100644
--- a/test/test_linearization_checker.py
+++ b/test/test_linearization_checker.py
@@ -1764,19 +1764,35 @@ def test_split_iname_with_dependencies():
         lang_version=(2018, 2)
         )
 
-    # TODO test split_iname 'within'
-
     from copy import deepcopy
-    ref_knl = deepcopy(knl)  # deepcopy necessary?
+    ref_knl = deepcopy(knl)  # without deepcopy, deps get applied to ref_knl
+
+    def _check_deps(transformed_knl, stmt1_dep_exp, len_unsatisfied_deps=0):
+        stmt0_deps = transformed_knl.id_to_insn["stmt0"].dependencies
+        stmt1_deps = transformed_knl.id_to_insn["stmt1"].dependencies
+
+        assert not stmt0_deps
+        assert len(stmt1_deps) == 1
+        assert len(stmt1_deps["stmt0"]) == 1
+        _align_and_compare_maps([(stmt1_deps["stmt0"][0], stmt1_dep_exp)])
+
+        # Check dep satisfaction
+        proc_knl = preprocess_kernel(transformed_knl)
+        lin_knl = get_one_linearized_kernel(proc_knl)
+        lin_items = lin_knl.linearization
+        unsatisfied_deps = lp.find_unsatisfied_dependencies(
+            proc_knl, lin_items)
+
+        assert len(unsatisfied_deps) == len_unsatisfied_deps
 
-    # {{{ dep that should be satisfied
+    # {{{ Split iname and make sure dep is correct
 
     dep_inout_space_str = "[{0}'=0, i'] -> [{0}=1, i]".format(STATEMENT_VAR_NAME)
-    dep = _isl_map_with_marked_dims(
+    dep_satisfied = _isl_map_with_marked_dims(
         "[p] -> { %s : 0 <= i < p and i' = i }"
         % (dep_inout_space_str))
 
-    knl = lp.add_stmt_inst_dependency(knl, "stmt1", "stmt0", dep)
+    knl = lp.add_stmt_inst_dependency(knl, "stmt1", "stmt0", dep_satisfied)
     knl = lp.split_iname(knl, "i", 32)
 
     dep_exp = _isl_map_with_marked_dims(
@@ -1786,26 +1802,52 @@ def test_split_iname_with_dependencies():
         " and 0 <= 32*i_outer' + i_inner' < p"  # transformed bounds (0 <= i' < p)
         " and i_inner + 32*i_outer = 32*i_outer' + i_inner'"  # i = i'
         "}}".format(STATEMENT_VAR_NAME))
-    dep_found = knl.id_to_insn["stmt1"].dependencies["stmt0"][0]
 
-    # make sure dep is correct
-    _align_and_compare_maps([(dep_exp, dep_found)])
+    _check_deps(knl, dep_exp)
 
-    # Get a linearization
-    proc_knl = preprocess_kernel(knl)
-    lin_knl = get_one_linearized_kernel(proc_knl)
-    lin_items = lin_knl.linearization
+    # }}}
 
-    unsatisfied_deps = lp.find_unsatisfied_dependencies(
-        proc_knl, lin_items)
+    # {{{ Split iname within stmt1 and make sure dep is correct
 
-    assert not unsatisfied_deps
+    knl = deepcopy(ref_knl)
+
+    knl = lp.add_stmt_inst_dependency(knl, "stmt1", "stmt0", dep_satisfied)
+    knl = lp.split_iname(knl, "i", 32, within="id:stmt1")
+
+    dep_exp = _isl_map_with_marked_dims(
+        "[p] -> {{ [{0}'=0, i'] -> [{0}=1, i_outer, i_inner] : "
+        "0 <= i_inner < 32"  # new bounds
+        " and 0 <= 32*i_outer + i_inner < p"  # transformed bounds (0 <= i < p)
+        " and 0 <= i' < p"  # original bounds
+        " and i_inner + 32*i_outer = i'"  # transform {i = i'}
+        "}}".format(STATEMENT_VAR_NAME))
+
+    _check_deps(knl, dep_exp)
 
     # }}}
 
-    # {{{ dep that should not be satisfied
+    # {{{ Split iname within stmt0 and make sure dep is correct
 
-    knl = ref_knl
+    knl = deepcopy(ref_knl)
+
+    knl = lp.add_stmt_inst_dependency(knl, "stmt1", "stmt0", dep_satisfied)
+    knl = lp.split_iname(knl, "i", 32, within="id:stmt0")
+
+    dep_exp = _isl_map_with_marked_dims(
+        "[p] -> {{ [{0}'=0, i_outer', i_inner'] -> [{0}=1, i] : "
+        "0 <= i_inner' < 32"  # new bounds
+        " and 0 <= i < p"  # original bounds
+        " and 0 <= 32*i_outer' + i_inner' < p"  # transformed bounds (0 <= i' < p)
+        " and i = 32*i_outer' + i_inner'"  # transform {i = i'}
+        "}}".format(STATEMENT_VAR_NAME))
+
+    _check_deps(knl, dep_exp)
+
+    # }}}
+
+    # {{{ Check dep that should not be satisfied
+
+    knl = deepcopy(ref_knl)
 
     dep_unsatisfied = _isl_map_with_marked_dims(
         "[p] -> { %s : 0 <= i < p and i' = i + 1 }"
@@ -1814,19 +1856,19 @@ def test_split_iname_with_dependencies():
     knl = lp.add_stmt_inst_dependency(knl, "stmt1", "stmt0", dep_unsatisfied)
     knl = lp.split_iname(knl, "i", 32)
 
-    # Get a linearization
-    proc_knl = preprocess_kernel(knl)
-    lin_knl = get_one_linearized_kernel(proc_knl)
-    lin_items = lin_knl.linearization
-
-    unsatisfied_deps = lp.find_unsatisfied_dependencies(
-        proc_knl, lin_items)
+    dep_exp = _isl_map_with_marked_dims(
+        "[p] -> {{ [{0}'=0, i_outer', i_inner'] -> [{0}=1, i_outer, i_inner] : "
+        "0 <= i_inner, i_inner' < 32"  # new bounds
+        " and 0 <= 32*i_outer + i_inner < p"  # transformed bounds (0 <= i < p)
+        " and 0 <= 32*i_outer' + i_inner' - 1 < p"  # trans. bounds (0 <= i'-1 < p)
+        " and i_inner + 32*i_outer + 1 = 32*i_outer' + i_inner'"  # i' = i + 1
+        "}}".format(STATEMENT_VAR_NAME))
 
-    assert len(unsatisfied_deps) == 1
+    _check_deps(knl, dep_exp, len_unsatisfied_deps=1)
 
     # }}}
 
-    # {{{ more deps that should be satisfied
+    # {{{ Deps that should be satisfied after gratuitous splitting
 
     knl = lp.make_kernel(
         "{[i,j,k,m]: 0<=i,j,k,m<p}",

From 92ef56f15cfb90a673f97d5f271eb49c52530092 Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Thu, 15 Apr 2021 20:18:36 -0500
Subject: [PATCH 282/460] one more foldmarker

---
 test/test_linearization_checker.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/test/test_linearization_checker.py b/test/test_linearization_checker.py
index 6845b615e..72b9714a9 100644
--- a/test/test_linearization_checker.py
+++ b/test/test_linearization_checker.py
@@ -1283,6 +1283,8 @@ def test_sios_and_schedules_with_vec_and_barriers():
 # }}}
 
 
+# {{{ test_add_stmt_inst_dependency
+
 def test_add_stmt_inst_dependency():
 
     # Make kernel and use OLD deps to control linearization order for now
@@ -1391,6 +1393,8 @@ def test_add_stmt_inst_dependency():
 
     assert not unsatisfied_deps
 
+# }}}
+
 
 # {{{ test_new_dependencies_finite_diff
 

From d7b47dd6dd297259eb4072d3ba61c9e34ba72292 Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Thu, 15 Apr 2021 20:18:57 -0500
Subject: [PATCH 283/460] clarification comment in map_dependency_lists

---
 loopy/transform/instruction.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/loopy/transform/instruction.py b/loopy/transform/instruction.py
index 7be058032..c8fd60447 100644
--- a/loopy/transform/instruction.py
+++ b/loopy/transform/instruction.py
@@ -153,6 +153,7 @@ def map_dependency_lists(
     # Set dependency = f(dependency) for:
     # All deps of stmts matching stmt_match_depender
     # All deps ON stmts matching stmt_match_dependee
+    # (but doesn't call f() twice if dep matches both depender and dependee)
 
     match_depender = _parse_match_if_necessary(stmt_match_depender)
     match_dependee = _parse_match_if_necessary(stmt_match_dependee)

From e181009a585af833358fe17b44b0e2511775b989 Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Fri, 16 Apr 2021 12:08:35 -0500
Subject: [PATCH 284/460] add TODO

---
 loopy/transform/iname.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/loopy/transform/iname.py b/loopy/transform/iname.py
index bf6a2ca54..40059ccaa 100644
--- a/loopy/transform/iname.py
+++ b/loopy/transform/iname.py
@@ -324,6 +324,7 @@ def _split_iname_in_dependee(dep):
 
         return map_from_set
 
+    # TODO figure out proper way to create false match condition
     false_id_match = "id:false and (not id:false)"
     kernel = map_dependency_maps(
         kernel, _split_iname_in_depender,

From af7c26315340afc0d32ba64388383dd5d930ffa9 Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Sat, 17 Apr 2021 15:16:35 -0500
Subject: [PATCH 285/460] Before adding each parallel iname constraint to a
 statement's schedule, make sure the iname applies to this statement. (Even
 though all parallel thread dims are active throughout the whole kernel, they
 may be assigned (tagged) to one iname for one subset of statements and
 another iname for a different subset of statements.); Add test with parallel
 matmul.

---
 loopy/schedule/checker/schedule.py | 19 ++++++----
 test/test_linearization_checker.py | 57 ++++++++++++++++++++++++++++++
 2 files changed, 70 insertions(+), 6 deletions(-)

diff --git a/loopy/schedule/checker/schedule.py b/loopy/schedule/checker/schedule.py
index 726256b45..05aea3bdf 100644
--- a/loopy/schedule/checker/schedule.py
+++ b/loopy/schedule/checker/schedule.py
@@ -366,14 +366,14 @@ def get_pairwise_statement_orderings_inner(
     # name in schedules, i.e., i = lid0, j = lid1, etc.
     lid_lex_dim_names = set()
     gid_lex_dim_names = set()
-    par_iname_constraint_dicts = []
+    par_iname_constraint_dicts = {}
     for iname in knl.all_inames():
         ltag = knl.iname_tags_of_type(iname, LocalIndexTag)
         if ltag:
             assert len(ltag) == 1  # (should always be true)
             ltag_var = LTAG_VAR_NAMES[ltag.pop().axis]
             lid_lex_dim_names.add(ltag_var)
-            par_iname_constraint_dicts.append({1: 0, iname: 1, ltag_var: -1})
+            par_iname_constraint_dicts[iname] = {1: 0, iname: 1, ltag_var: -1}
 
             continue  # Shouldn't be any GroupIndexTags
 
@@ -382,7 +382,7 @@ def get_pairwise_statement_orderings_inner(
             assert len(gtag) == 1  # (should always be true)
             gtag_var = GTAG_VAR_NAMES[gtag.pop().axis]
             gid_lex_dim_names.add(gtag_var)
-            par_iname_constraint_dicts.append({1: 0, iname: 1, gtag_var: -1})
+            par_iname_constraint_dicts[iname] = {1: 0, iname: 1, gtag_var: -1}
 
     # Sort for consistent dimension ordering
     lid_lex_dim_names = sorted(lid_lex_dim_names)
@@ -786,9 +786,16 @@ def _get_map_for_stmt(
             )
 
         # Set inames equal to relevant gid/lid var names
-        for constraint_dict in par_iname_constraint_dicts:
-            sched_map = sched_map.add_constraint(
-                isl.Constraint.eq_from_names(sched_map.space, constraint_dict))
+        for iname, constraint_dict in par_iname_constraint_dicts.items():
+            # Even though all parallel thread dims are active throughout the
+            # whole kernel, they may be assigned (tagged) to one iname for some
+            # subset of statements and another iname for a different subset of
+            # statements (e.g., tiled, paralle. matmul).
+            # So before adding each parallel iname constraint, make sure the
+            # iname applies to this statement:
+            if iname in dom_inames_ordered:
+                sched_map = sched_map.add_constraint(
+                    isl.Constraint.eq_from_names(sched_map.space, constraint_dict))
 
         return sched_map
 
diff --git a/test/test_linearization_checker.py b/test/test_linearization_checker.py
index 528f10944..060e3326d 100644
--- a/test/test_linearization_checker.py
+++ b/test/test_linearization_checker.py
@@ -1283,6 +1283,63 @@ def test_sios_and_schedules_with_vec_and_barriers():
 # }}}
 
 
+# {{{ test_sios_with_matmul
+
+def test_sios_with_matmul():
+    from loopy.schedule.checker import (
+        get_pairwise_statement_orderings,
+    )
+    # For now, this test just ensures all pairwise SIOs can be created
+    # for a complex parallel kernel without any errors/exceptions. Later PRs
+    # will examine this kernel's SIOs and related dependencies for accuracy.
+
+    bsize = 16
+    knl = lp.make_kernel(
+            "{[i,k,j]: 0<=i<n and 0<=k<m and 0<=j<ell}",
+            [
+                "c[i, j] = sum(k, a[i, k]*b[k, j])"
+            ],
+            name="matmul",
+            assumptions="n,m,ell >= 1",
+            lang_version=(2018, 2),
+            )
+    knl = lp.add_and_infer_dtypes(
+        knl, dict(a=np.float32, b=np.float32))
+    knl = lp.split_iname(
+        knl, "i", bsize, outer_tag="g.0", inner_tag="l.1")
+    knl = lp.split_iname(
+        knl, "j", bsize, outer_tag="g.1", inner_tag="l.0")
+    knl = lp.split_iname(knl, "k", bsize)
+    knl = lp.add_prefetch(
+        knl, "a", ["k_inner", "i_inner"], default_tag="l.auto")
+    knl = lp.add_prefetch(
+        knl, "b", ["j_inner", "k_inner"], default_tag="l.auto")
+    knl = lp.prioritize_loops(knl, "k_outer,k_inner")
+
+    proc_knl = preprocess_kernel(knl)
+
+    # Get a linearization
+    proc_knl = preprocess_kernel(knl)
+    lin_knl = get_one_linearized_kernel(proc_knl)
+    linearization_items = lin_knl.linearization
+
+    # Get ALL statement id pairs
+    from loopy.schedule import RunInstruction
+    all_stmt_ids = [
+        lin_item.insn_id for lin_item in linearization_items
+        if isinstance(lin_item, RunInstruction)]
+    from itertools import product
+    stmt_id_pairs = []
+    for idx, sid in enumerate(all_stmt_ids):
+        stmt_id_pairs.extend(product([sid], all_stmt_ids[idx+1:]))
+
+    # Generate pairwise ordering info for every pair
+    get_pairwise_statement_orderings(
+        lin_knl, linearization_items, stmt_id_pairs)
+
+# }}}
+
+
 if __name__ == "__main__":
     if len(sys.argv) > 1:
         exec(sys.argv[1])

From 47967a3698622f981aa1b210d18ab69705a2f232 Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Sat, 17 Apr 2021 21:30:50 -0500
Subject: [PATCH 286/460] add test for dep updating during map_domain (even
 though map_domain doesn't update deps yet)

---
 test/test_linearization_checker.py | 129 +++++++++++++++++++++++++++++
 1 file changed, 129 insertions(+)

diff --git a/test/test_linearization_checker.py b/test/test_linearization_checker.py
index f7ddf3dd6..631e8af71 100644
--- a/test/test_linearization_checker.py
+++ b/test/test_linearization_checker.py
@@ -1999,6 +1999,135 @@ def _check_deps(transformed_knl, stmt1_dep_exp, len_unsatisfied_deps=0):
 
 # }}}
 
+
+# {{{ test_map_domain_with_dependencies
+
+def test_map_domain_with_dependencies():
+
+    # {{{ make kernel
+
+    ref_knl = lp.make_kernel(
+        "[nx,nt] -> {[ix, it]: 1<=ix<nx-1 and 0<=it<nt}",
+        """
+        u[ix, it+2] = (
+            2*u[ix, it+1]
+            + dt**2/dx**2 * (u[ix+1, it+1] - 2*u[ix, it+1] + u[ix-1, it+1])
+            - u[ix, it])  {id=stmt}
+        """,
+        name="wave_equation",
+        #assumptions="nx,nt >= 3",  # works without these (?)
+        lang_version=(2018, 2),
+        )
+    ref_knl = lp.add_and_infer_dtypes(ref_knl, {"u,dt,dx": np.float32})
+    stmt_before = stmt_after = "stmt"
+
+    # }}}
+
+    # {{{ Check deps *without* map_domain transformation
+
+    from copy import deepcopy
+    knl = deepcopy(ref_knl)  # without deepcopy, deps will be added to ref_knl
+
+    # Prioritize loops
+    knl = lp.prioritize_loops(knl, ("it", "ix"))  # valid
+    #knl = lp.prioritize_loops(knl, ("ix", "it"))  # invalid
+
+    dep_map = _isl_map_with_marked_dims(
+        "[nx, nt] -> {{"
+        "[{0}' = 0, ix', it'] -> [{0} = 0, ix, it = 1 + it'] : "
+        "0 < ix' <= -2 + nx and 0 <= it' <= -2 + nt and ix >= -1 + ix' and "
+        "0 < ix <= 1 + ix' and ix <= -2 + nx; "
+        "[statement' = 0, ix', it'] -> [statement = 0, ix = ix', it = 2 + it'] : "
+        "0 < ix' <= -2 + nx and 0 <= it' <= -3 + nt"
+        "}}".format(STATEMENT_VAR_NAME))
+
+    knl = lp.add_stmt_inst_dependency(
+        knl, stmt_after, stmt_before, dep_map)
+
+    # Get a linearization
+    proc_knl = lp.preprocess_kernel(knl)
+    lin_knl = lp.get_one_linearized_kernel(proc_knl)
+
+    # Check dependencies
+    dep_found = proc_knl.id_to_insn[stmt_after].dependencies[stmt_before][0]
+    assert dep_found.get_var_dict() == dep_map.get_var_dict()
+    assert dep_found == dep_map
+    unsatisfied_deps = lp.find_unsatisfied_dependencies(
+        proc_knl, lin_knl.linearization)
+    assert not unsatisfied_deps
+
+    # }}}
+
+    # {{{ Check dependency after domain change mapping
+
+    knl = deepcopy(ref_knl)  # without deepcopy, deps will be added to ref_knl
+
+    # Add dependency (TODO add dep here once map_domains updates deps correctly)
+    #knl = lp.add_stmt_inst_dependency(
+    #    knl, stmt_after, stmt_before, dep_map)
+
+    # Create map_domain mapping:
+    transform_map = isl.BasicMap(
+        "[nx,nt] -> {[ix, it] -> [tx, tt, tparity, itt, itx]: "
+        "16*(tx - tt) + itx - itt = ix - it and "
+        "16*(tx + tt + tparity) + itt + itx = ix + it and "
+        "0<=tparity<2 and 0 <= itx - itt < 16 and 0 <= itt+itx < 16}")
+
+    # Call map_domain to transform kernel
+    knl = lp.map_domain(knl, transform_map)
+
+    # Prioritize loops (prio should eventually be updated in map_domain?)
+    knl = lp.prioritize_loops(knl, "tt,tparity,tx,itt,itx")
+
+    # {{{ Manually apply transform map to dependency and add it to knl
+
+    # NOTE: This will later occur inside map_domain when dependencies are updated
+    # during transformation, and this test will be updated accordingly
+
+    # Prep transform map to be applied to dependency
+    from loopy.schedule.checker.utils import (
+        insert_and_name_isl_dims,
+        add_eq_isl_constraint_from_names,
+        append_mark_to_isl_map_var_names,
+    )
+    dt = isl.dim_type
+    # Insert 'statement' dim into transform map
+    transform_map = insert_and_name_isl_dims(
+            transform_map, dt.in_, [STATEMENT_VAR_NAME+BEFORE_MARK], 0)
+    transform_map = insert_and_name_isl_dims(
+            transform_map, dt.out, [STATEMENT_VAR_NAME], 0)
+    # Add stmt = stmt' constraint
+    transform_map = add_eq_isl_constraint_from_names(
+        transform_map, STATEMENT_VAR_NAME, STATEMENT_VAR_NAME+BEFORE_MARK)
+
+    # Apply transform map to dependency
+    mapped_dep_map = dep_map.apply_range(transform_map).apply_domain(transform_map)
+    mapped_dep_map = append_mark_to_isl_map_var_names(
+        mapped_dep_map, dt.in_, BEFORE_MARK)
+
+    # Add dep to kernel
+    knl = lp.add_stmt_inst_dependency(
+        knl, stmt_after, stmt_before, mapped_dep_map)
+
+    # }}}
+
+    # Get a linearization
+    proc_knl = lp.preprocess_kernel(knl)
+    lin_knl = lp.get_one_linearized_kernel(proc_knl)
+
+    # Check dependencies
+    dep_found = proc_knl.id_to_insn[stmt_after].dependencies[stmt_before][0]
+    assert dep_found.get_var_dict() == mapped_dep_map.get_var_dict()
+    assert dep_found == mapped_dep_map
+    unsatisfied_deps = lp.find_unsatisfied_dependencies(
+        proc_knl, lin_knl.linearization)
+
+    assert not unsatisfied_deps
+
+    # }}}
+
+# }}}
+
 # }}}
 
 # }}}

From aef8febb989992ef4c7f849d4839ec40c8253698 Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Sun, 18 Apr 2021 15:07:01 -0500
Subject: [PATCH 287/460] add foldmarker

---
 loopy/transform/iname.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/loopy/transform/iname.py b/loopy/transform/iname.py
index 4a38920fd..98e04be04 100644
--- a/loopy/transform/iname.py
+++ b/loopy/transform/iname.py
@@ -2176,6 +2176,8 @@ def process_set(s):
 # }}}
 
 
+# {{{ add_inames_for_unused_hw_axes
+
 def add_inames_for_unused_hw_axes(kernel, within=None):
     """
     Returns a kernel with inames added to each instruction
@@ -2285,4 +2287,6 @@ def add_inames_for_unused_hw_axes(kernel, within=None):
 
     return kernel.copy(instructions=new_insns)
 
+# }}}
+
 # vim: foldmethod=marker

From 34fab989cb019289431bbd532fd24bf0db67b3e7 Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Sun, 18 Apr 2021 15:10:43 -0500
Subject: [PATCH 288/460] fix bug in map_domain (when aligning second dim type,
 make sure to use result from first align)

---
 loopy/transform/iname.py | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/loopy/transform/iname.py b/loopy/transform/iname.py
index eed310f29..962566433 100644
--- a/loopy/transform/iname.py
+++ b/loopy/transform/iname.py
@@ -1977,13 +1977,17 @@ def process_set(s):
                 for dt in dim_types
                 for i in range(isl_map.dim(dt))
                 ]
+        # (order doesn't matter in s_names/map_names,
+        # _align_dim_type just converts these to sets
+        # to determine which names are in both the obj and template,
+        # not sure why this isn't just handled inside _align_dim_type)
         aligned_map = _align_dim_type(
                 dim_type.param,
                 isl_map, map_with_s_domain, False,
                 map_names, s_names)
         aligned_map = _align_dim_type(
                 dim_type.in_,
-                isl_map, map_with_s_domain, False,
+                aligned_map, map_with_s_domain, False,
                 map_names, s_names)
         # Old code
         """

From 18b3f185b9f234a3955f741d2e88db2b201b3633 Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Sun, 18 Apr 2021 16:00:28 -0500
Subject: [PATCH 289/460] update dependencies in map_domain

---
 loopy/transform/iname.py | 47 ++++++++++++++++++++++++++++++++++++++++
 1 file changed, 47 insertions(+)

diff --git a/loopy/transform/iname.py b/loopy/transform/iname.py
index 3505403e3..0636dbc34 100644
--- a/loopy/transform/iname.py
+++ b/loopy/transform/iname.py
@@ -2140,6 +2140,53 @@ def process_set(s):
 
     new_domains = [process_set(dom) for dom in kernel.domains]
 
+    # {{{ update dependencies
+
+    # Prep transform map to be applied to dependency
+    from loopy.transform.instruction import map_dependency_maps
+    from loopy.schedule.checker.utils import (
+        insert_and_name_isl_dims,
+        add_eq_isl_constraint_from_names,
+    )
+    from loopy.schedule.checker.schedule import (
+        BEFORE_MARK,
+        STATEMENT_VAR_NAME,
+    )
+    dt = isl.dim_type
+
+    # Insert 'statement' dim into transform map
+    dep_transform_map = insert_and_name_isl_dims(
+            isl_map, dt.in_, [STATEMENT_VAR_NAME+BEFORE_MARK], 0)
+    dep_transform_map = insert_and_name_isl_dims(
+            dep_transform_map, dt.out, [STATEMENT_VAR_NAME], 0)
+    # Add stmt = stmt' constraint
+    dep_transform_map = add_eq_isl_constraint_from_names(
+        dep_transform_map, STATEMENT_VAR_NAME, STATEMENT_VAR_NAME+BEFORE_MARK)
+
+    def _apply_transform_map_to_depender(dep_map):
+        # Apply transform map to dep output dims
+        return dep_map.apply_range(dep_transform_map)
+
+    def _apply_transform_map_to_dependee(dep_map):
+        from loopy.schedule.checker.utils import (
+            append_mark_to_isl_map_var_names,
+        )
+        # Apply transform map to dep input dims (and re-insert BEFORE_MARK)
+        return append_mark_to_isl_map_var_names(
+            dep_map.apply_domain(dep_transform_map), dt.in_, BEFORE_MARK)
+
+    # TODO figure out proper way to create false match condition
+    false_id_match = "id:false and (not id:false)"
+    #false_id_match = "not id:*"
+    kernel = map_dependency_maps(
+        kernel, _apply_transform_map_to_depender,
+        stmt_match_depender=within, stmt_match_dependee=false_id_match)
+    kernel = map_dependency_maps(
+        kernel, _apply_transform_map_to_dependee,
+        stmt_match_depender=false_id_match, stmt_match_dependee=within)
+
+    # }}}
+
     # {{{ update within_inames
 
     new_insns = []

From c61d59d0c56902ac9814b87a8e92520f396c0f16 Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Sun, 18 Apr 2021 16:00:40 -0500
Subject: [PATCH 290/460] test dependency updating in map_domain

---
 test/test_linearization_checker.py | 40 ++++++++++++------------------
 1 file changed, 16 insertions(+), 24 deletions(-)

diff --git a/test/test_linearization_checker.py b/test/test_linearization_checker.py
index 631e8af71..89bba11e1 100644
--- a/test/test_linearization_checker.py
+++ b/test/test_linearization_checker.py
@@ -2006,7 +2006,7 @@ def test_map_domain_with_dependencies():
 
     # {{{ make kernel
 
-    ref_knl = lp.make_kernel(
+    knl = lp.make_kernel(
         "[nx,nt] -> {[ix, it]: 1<=ix<nx-1 and 0<=it<nt}",
         """
         u[ix, it+2] = (
@@ -2018,19 +2018,12 @@ def test_map_domain_with_dependencies():
         #assumptions="nx,nt >= 3",  # works without these (?)
         lang_version=(2018, 2),
         )
-    ref_knl = lp.add_and_infer_dtypes(ref_knl, {"u,dt,dx": np.float32})
+    knl = lp.add_and_infer_dtypes(knl, {"u,dt,dx": np.float32})
     stmt_before = stmt_after = "stmt"
 
     # }}}
 
-    # {{{ Check deps *without* map_domain transformation
-
-    from copy import deepcopy
-    knl = deepcopy(ref_knl)  # without deepcopy, deps will be added to ref_knl
-
-    # Prioritize loops
-    knl = lp.prioritize_loops(knl, ("it", "ix"))  # valid
-    #knl = lp.prioritize_loops(knl, ("ix", "it"))  # invalid
+    # {{{ add dependency
 
     dep_map = _isl_map_with_marked_dims(
         "[nx, nt] -> {{"
@@ -2044,6 +2037,16 @@ def test_map_domain_with_dependencies():
     knl = lp.add_stmt_inst_dependency(
         knl, stmt_after, stmt_before, dep_map)
 
+    # }}}
+
+    # {{{ Check deps *without* map_domain transformation
+
+    ref_knl = knl
+
+    # Prioritize loops
+    knl = lp.prioritize_loops(knl, ("it", "ix"))  # valid
+    #knl = lp.prioritize_loops(knl, ("ix", "it"))  # invalid
+
     # Get a linearization
     proc_knl = lp.preprocess_kernel(knl)
     lin_knl = lp.get_one_linearized_kernel(proc_knl)
@@ -2060,11 +2063,7 @@ def test_map_domain_with_dependencies():
 
     # {{{ Check dependency after domain change mapping
 
-    knl = deepcopy(ref_knl)  # without deepcopy, deps will be added to ref_knl
-
-    # Add dependency (TODO add dep here once map_domains updates deps correctly)
-    #knl = lp.add_stmt_inst_dependency(
-    #    knl, stmt_after, stmt_before, dep_map)
+    knl = ref_knl  # loop priority goes away, deps stay
 
     # Create map_domain mapping:
     transform_map = isl.BasicMap(
@@ -2079,10 +2078,7 @@ def test_map_domain_with_dependencies():
     # Prioritize loops (prio should eventually be updated in map_domain?)
     knl = lp.prioritize_loops(knl, "tt,tparity,tx,itt,itx")
 
-    # {{{ Manually apply transform map to dependency and add it to knl
-
-    # NOTE: This will later occur inside map_domain when dependencies are updated
-    # during transformation, and this test will be updated accordingly
+    # {{{ Create expected dependency
 
     # Prep transform map to be applied to dependency
     from loopy.schedule.checker.utils import (
@@ -2105,10 +2101,6 @@ def test_map_domain_with_dependencies():
     mapped_dep_map = append_mark_to_isl_map_var_names(
         mapped_dep_map, dt.in_, BEFORE_MARK)
 
-    # Add dep to kernel
-    knl = lp.add_stmt_inst_dependency(
-        knl, stmt_after, stmt_before, mapped_dep_map)
-
     # }}}
 
     # Get a linearization
@@ -2124,7 +2116,7 @@ def test_map_domain_with_dependencies():
 
     assert not unsatisfied_deps
 
-    # }}}
+# }}}
 
 # }}}
 

From 34e0da567269e97f553be80615954720c59624cb Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Sun, 18 Apr 2021 16:01:55 -0500
Subject: [PATCH 291/460] change false match condition to 'not id:*'

---
 loopy/transform/iname.py | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/loopy/transform/iname.py b/loopy/transform/iname.py
index 0636dbc34..7bc6fa722 100644
--- a/loopy/transform/iname.py
+++ b/loopy/transform/iname.py
@@ -325,7 +325,7 @@ def _split_iname_in_dependee(dep):
         return map_from_set
 
     # TODO figure out proper way to create false match condition
-    false_id_match = "id:false and (not id:false)"
+    false_id_match = "not id:*"
     kernel = map_dependency_maps(
         kernel, _split_iname_in_depender,
         stmt_match_depender=within, stmt_match_dependee=false_id_match)
@@ -999,7 +999,7 @@ def _rename_iname_in_dep_in(dep):
 
         # TODO figure out proper way to match none
         # TODO figure out match vs stack_match
-        false_id_match = "id:false and (not id:false)"
+        false_id_match = "not id:*"
         kernel = map_dependency_maps(
             kernel, _rename_iname_in_dep_out,
             stmt_match_depender=within, stmt_match_dependee=false_id_match)
@@ -2176,8 +2176,7 @@ def _apply_transform_map_to_dependee(dep_map):
             dep_map.apply_domain(dep_transform_map), dt.in_, BEFORE_MARK)
 
     # TODO figure out proper way to create false match condition
-    false_id_match = "id:false and (not id:false)"
-    #false_id_match = "not id:*"
+    false_id_match = "not id:*"
     kernel = map_dependency_maps(
         kernel, _apply_transform_map_to_depender,
         stmt_match_depender=within, stmt_match_dependee=false_id_match)

From 137e7c7e4a4a15d5a46f2ae7adfe4f7432e0fc75 Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Mon, 19 Apr 2021 17:31:28 -0500
Subject: [PATCH 292/460] fix bug in map_domain to actually allow instructions
 with domains independent from the transformation map, as advertised

---
 loopy/transform/iname.py | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/loopy/transform/iname.py b/loopy/transform/iname.py
index 962566433..490c1622b 100644
--- a/loopy/transform/iname.py
+++ b/loopy/transform/iname.py
@@ -1952,7 +1952,12 @@ def process_set(s):
         var_dict = s.get_var_dict()
 
         overlap = old_inames & frozenset(var_dict)
-        if overlap and len(overlap) != len(old_inames):
+
+        if not overlap:
+            # inames in s are not present in transform map, don't change s
+            return s
+
+        if len(overlap) != len(old_inames):
             raise LoopyError("loop domain '%s' involves a part "
                     "of the map domain inames. Domains must "
                     "either involve all or none of the map domain "

From 399ec46a8049d51b0776ece395ce327e7ace92bc Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Mon, 19 Apr 2021 17:34:05 -0500
Subject: [PATCH 293/460] allow post-transform iname renaming in map_domain to
 handle case where desired transformation map would keep an old iname around
 (with same name) after the map is applied (maybe handle differently later)

---
 loopy/transform/iname.py | 20 +++++++++++++++++++-
 1 file changed, 19 insertions(+), 1 deletion(-)

diff --git a/loopy/transform/iname.py b/loopy/transform/iname.py
index 490c1622b..534d553ab 100644
--- a/loopy/transform/iname.py
+++ b/loopy/transform/iname.py
@@ -1890,7 +1890,7 @@ def _find_aff_subst_from_map(iname, isl_map):
     raise LoopyError("no suitable equation for '%s' found" % iname)
 
 
-def map_domain(kernel, isl_map, within=None):
+def map_domain(kernel, isl_map, within=None, rename_after={}):
     # FIXME: Express _split_iname_backend in terms of this
     #   Missing/deleted for now:
     #     - slab processing
@@ -2048,6 +2048,24 @@ def process_set(s):
     kernel = ins.map_kernel(kernel)
     kernel = rule_mapping_context.finish_kernel(kernel)
 
+    # {{{ Rename inames according to rename_after dict
+
+    # This is currently an option because various isl operations fail when map dim
+    # names are not unique, so even if someone wants their transformation map to keep
+    # one of the inames unchanged, they must give it a new name
+    # in their map, e.g., "[x, t] -> [x_, t_outer, t_inner] : x_ = x ..." (see
+    # test_map_domain_vs_split_iname()). Currently, they can't
+    # simply exclude that iname from the transformation map because, as stated in
+    # the error above, all domains must either involve all or none of the
+    # transform map domain inames. This renaming option lets them, e.g. switch
+    # an iname back to its original name.
+
+    # TODO come up with better solution for this
+    for old_iname, new_iname in rename_after.items():
+        kernel = rename_iname(kernel, old_iname, new_iname, within=within)
+
+    # }}}
+
     return kernel
 
 # }}}

From 632aafd84d2a3568a5b98c1d931765dac6ea4337 Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Mon, 19 Apr 2021 17:34:28 -0500
Subject: [PATCH 294/460] create test to compare map_domain result to
 split_iname result

---
 test/test_transform.py | 78 ++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 78 insertions(+)

diff --git a/test/test_transform.py b/test/test_transform.py
index fdba857c9..6954513c8 100644
--- a/test/test_transform.py
+++ b/test/test_transform.py
@@ -567,6 +567,84 @@ def test_nested_substs_in_insns(ctx_factory):
     lp.auto_test_vs_ref(ref_knl, ctx, knl)
 
 
+# {{{ test_map_domain_vs_split_iname
+
+def test_map_domain_vs_split_iname():
+
+    # {{{ Make kernel
+
+    knl = lp.make_kernel(
+        [
+            "[nx,nt] -> {[x, t]: 0 <= x < nx and 0 <= t < nt}",
+            "[ni] -> {[i]: 0 <= i < ni}",
+        ],
+        """
+        a[x,t] = b[x,t]  {id=stmta}
+        c[x,t] = d[x,t]  {id=stmtc}
+        e[i] = f[i]
+        """,
+        name="wave_equation",
+        lang_version=(2018, 2),
+        )
+    knl = lp.add_and_infer_dtypes(knl, {"b,d,f": np.float32})
+    ref_knl = knl
+
+    # }}}
+
+    # {{{ Apply domain change mapping
+
+    knl_map_dom = ref_knl  # loop priority goes away, deps stay
+
+    # Create map_domain mapping:
+    import islpy as isl
+    transform_map = isl.BasicMap(
+        "[nx,nt] -> {[x, t] -> [x_, t_outer, t_inner]: "
+        "x = x_ and "
+        "0 <= t_inner < 32 and "
+        "32*t_outer + t_inner = t and "
+        "0 <= 32*t_outer + t_inner < nt}")
+
+    # Call map_domain to transform kernel
+    knl_map_dom = lp.map_domain(knl_map_dom, transform_map, rename_after={"x_": "x"})
+
+    # Prioritize loops (prio should eventually be updated in map_domain?)
+    knl_map_dom = lp.prioritize_loops(knl_map_dom, "x, t_outer, t_inner")
+
+    # Get a linearization
+    proc_knl_map_dom = lp.preprocess_kernel(knl_map_dom)
+    lin_knl_map_dom = lp.get_one_linearized_kernel(proc_knl_map_dom)
+
+    # }}}
+
+    # {{{ Split iname and see if we get the same result
+
+    knl_split_iname = ref_knl
+    knl_split_iname = lp.split_iname(knl_split_iname, "t", 32)
+    knl_split_iname = lp.prioritize_loops(knl_split_iname, "x, t_outer, t_inner")
+    proc_knl_split_iname = lp.preprocess_kernel(knl_split_iname)
+    lin_knl_split_iname = lp.get_one_linearized_kernel(proc_knl_split_iname)
+
+    from loopy.schedule.checker.utils import (
+        ensure_dim_names_match_and_align,
+    )
+    for d_map_domain, d_split_iname in zip(
+            knl_map_dom.domains, knl_split_iname.domains):
+        d_map_domain_aligned = ensure_dim_names_match_and_align(
+            d_map_domain, d_split_iname)
+        assert d_map_domain_aligned == d_split_iname
+
+    for litem_map_domain, litem_split_iname in zip(
+            lin_knl_map_dom.linearization, lin_knl_split_iname.linearization):
+        assert litem_map_domain == litem_split_iname
+
+    # Can't easily compare instructions because equivalent subscript
+    # expressions may have different orders
+
+    # }}}
+
+# }}}
+
+
 def test_diamond_tiling(ctx_factory, interactive=False):
     ctx = ctx_factory()
     queue = cl.CommandQueue(ctx)

From cb8fb0b1582b748da9059551ca8bf5c73f155923 Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Mon, 19 Apr 2021 20:59:24 -0500
Subject: [PATCH 295/460] add find_and_rename_dim() function

---
 loopy/schedule/checker/utils.py | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/loopy/schedule/checker/utils.py b/loopy/schedule/checker/utils.py
index 364ad0c8e..b4ff9636d 100644
--- a/loopy/schedule/checker/utils.py
+++ b/loopy/schedule/checker/utils.py
@@ -138,6 +138,14 @@ def append_mark_to_isl_map_var_names(old_isl_map, dim_type, mark):
     return new_map
 
 
+def find_and_rename_dim(old_map, dim_types, old_name, new_name):
+    new_map = old_map.copy()
+    for dim_type in dim_types:
+        new_map = new_map.set_dim_name(
+            dim_type, new_map.find_dim_by_name(dim_type, old_name), new_name)
+    return new_map
+
+
 def append_mark_to_strings(strings, mark):
     return [s+mark for s in strings]
 

From 81001ca981938d76c32eac4a37778108e9b93f68 Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Mon, 19 Apr 2021 21:01:59 -0500
Subject: [PATCH 296/460] in dependency handling within map_domain, handle
 cases where *only* the depender or dependee uses the domain being
 transformed; also handle cases where the dep dims are not aligned with the
 transform map

---
 loopy/transform/iname.py | 110 ++++++++++++++++++++++++++++++++-------
 1 file changed, 90 insertions(+), 20 deletions(-)

diff --git a/loopy/transform/iname.py b/loopy/transform/iname.py
index fbf1b70b1..baea3ea32 100644
--- a/loopy/transform/iname.py
+++ b/loopy/transform/iname.py
@@ -2080,21 +2080,26 @@ def _do_not_transform_if_no_within_matches():
 
     # }}}
 
-    def process_set(s):
+    def _check_overlap_condition_for_domain(s, transform_map_in_names):
         var_dict = s.get_var_dict()
 
-        overlap = old_inames & frozenset(var_dict)
-
-        if not overlap:
-            # inames in s are not present in transform map, don't change s
-            return s
+        overlap = transform_map_in_names & frozenset(var_dict)
 
-        if len(overlap) != len(old_inames):
+        if overlap and len(overlap) != len(transform_map_in_names):
             raise LoopyError("loop domain '%s' involves a part "
                     "of the map domain inames. Domains must "
                     "either involve all or none of the map domain "
                     "inames." % s)
 
+        return overlap
+
+    def process_set(s):
+
+        overlap = _check_overlap_condition_for_domain(s, old_inames)
+        if not overlap:
+            # inames in s are not present in transform map, don't change s
+            return s
+
         # {{{ align dims of isl_map and s
 
         # FIXME: Make this less gross
@@ -2159,26 +2164,91 @@ def process_set(s):
     )
     dt = isl.dim_type
 
-    # Insert 'statement' dim into transform map
+    # Create version of transform map with before marks
+    # (for aligning when applying map to domains of dependees)
+    from loopy.schedule.checker.utils import (
+        append_mark_to_isl_map_var_names,
+    )
+    dep_transform_map_marked = append_mark_to_isl_map_var_names(
+        isl_map, dt.in_, BEFORE_MARK)
+
+    # Insert 'statement' dim into transform maps (mark the 'in' statement in BOTH cases)
+
+    # NOTE: dims must all be named correctly for the alignment to work, but dim names
+    # must also be unique, so the output statement var name can't match the input
+    # statement var name, which means in order to have the map keep the statement
+    # dim unchanged, (map statement_var -> statement_var), we have to change its
+    # name and then change it back afterward.
+
+    # (TODO: create a function that makes it easier to apply a transform map
+    # (tgt.apply_domain/tgt.apply_range) when the input dims of the transform map
+    # are a *subset* of the domain/range of the tgt, in which case the extra dims
+    # remain unchanged.)
+
+    dep_transform_map_marked = insert_and_name_isl_dims(
+        dep_transform_map_marked, dt.in_, [STATEMENT_VAR_NAME+BEFORE_MARK], 0)
+    dep_transform_map_marked = insert_and_name_isl_dims(
+       dep_transform_map_marked, dt.out, [STATEMENT_VAR_NAME], 0)
+    # Add stmt = stmt' constraint
+    dep_transform_map_marked = add_eq_isl_constraint_from_names(
+        dep_transform_map_marked, STATEMENT_VAR_NAME, STATEMENT_VAR_NAME+BEFORE_MARK)
+
+    # Temporarily rename stmt in 'out' dim for reason described above
+    temp_stmt_var = STATEMENT_VAR_NAME+"__"
     dep_transform_map = insert_and_name_isl_dims(
-            isl_map, dt.in_, [STATEMENT_VAR_NAME+BEFORE_MARK], 0)
+        isl_map, dt.in_, [STATEMENT_VAR_NAME], 0)
     dep_transform_map = insert_and_name_isl_dims(
-            dep_transform_map, dt.out, [STATEMENT_VAR_NAME], 0)
-    # Add stmt = stmt' constraint
+       dep_transform_map, dt.out, [temp_stmt_var], 0)
+    # Add stmt = temp_stmt_var constraint
     dep_transform_map = add_eq_isl_constraint_from_names(
-        dep_transform_map, STATEMENT_VAR_NAME, STATEMENT_VAR_NAME+BEFORE_MARK)
+        dep_transform_map, STATEMENT_VAR_NAME, temp_stmt_var)
 
     def _apply_transform_map_to_depender(dep_map):
-        # Apply transform map to dep output dims
-        return dep_map.apply_range(dep_transform_map)
+
+        # Check overlap condition
+        overlap = _check_overlap_condition_for_domain(dep_map.range(), old_inames)
+
+        if not overlap:
+            # Inames in s are not present in depender, don't change dep_map
+            return dep_map
+        else:
+
+            # Align 'in_' dim of transform map with 'out' dim of dep
+            # (since 'out' dim of dep is unmarked, use unmarked dep_transform_map)
+            from loopy.schedule.checker.utils import reorder_dims_by_name
+            dep_transform_map_aligned = reorder_dims_by_name(
+                dep_transform_map, dt.in_, dep_map.get_var_names(dt.out))
+
+            # Apply transform map to dep output dims
+            transformed_dep_map = dep_map.apply_range(dep_transform_map_aligned)
+
+            # Now we've renamed statement var, so fix it (assume statement dim is 0)
+            return transformed_dep_map.set_dim_name(dt.out, 0, STATEMENT_VAR_NAME)
+
+    old_inames_marked = frozenset(old_iname+BEFORE_MARK for old_iname in old_inames)
 
     def _apply_transform_map_to_dependee(dep_map):
-        from loopy.schedule.checker.utils import (
-            append_mark_to_isl_map_var_names,
-        )
-        # Apply transform map to dep input dims (and re-insert BEFORE_MARK)
-        return append_mark_to_isl_map_var_names(
-            dep_map.apply_domain(dep_transform_map), dt.in_, BEFORE_MARK)
+
+        # Check overlap condition
+        overlap = _check_overlap_condition_for_domain(
+            dep_map.domain(), old_inames_marked)
+
+        if not overlap:
+            # Inames in s are not present in dependee, don't change dep_map
+            return dep_map
+        else:
+
+            # Align 'in_' dim of transform map with 'in_' dim of dep
+            # (since 'in_' dim of dep is marked, use dep_transform_map_marked)
+            from loopy.schedule.checker.utils import reorder_dims_by_name
+            dep_transform_map_aligned = reorder_dims_by_name(
+                dep_transform_map_marked, dt.in_, dep_map.get_var_names(dt.in_))
+
+            # Apply transform map to dep input dims (and re-insert BEFORE_MARK)
+            transformed_dep_map = dep_map.apply_domain(dep_transform_map_aligned)
+
+            # Now re-add the before marks
+            return append_mark_to_isl_map_var_names(transformed_dep_map, dt.in_, BEFORE_MARK)
 
     # TODO figure out proper way to create false match condition
     false_id_match = "not id:*"

From e59e27793b4c3bca7be9c424e702c5fa31acb875 Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Mon, 19 Apr 2021 21:06:14 -0500
Subject: [PATCH 297/460] add
 test_map_domain_with_only_partial_dep_pair_affected() to test dep handling in
 map_domain

---
 test/test_linearization_checker.py | 138 +++++++++++++++++++++++++++--
 1 file changed, 133 insertions(+), 5 deletions(-)

diff --git a/test/test_linearization_checker.py b/test/test_linearization_checker.py
index 89bba11e1..8f700adf4 100644
--- a/test/test_linearization_checker.py
+++ b/test/test_linearization_checker.py
@@ -1817,7 +1817,7 @@ def _check_deps(transformed_knl, c_dep_exp):
 # }}}
 
 
-# {{{ def test_split_iname_with_dependencies
+# {{{ test_split_iname_with_dependencies
 
 def test_split_iname_with_dependencies():
     knl = lp.make_kernel(
@@ -2000,11 +2000,137 @@ def _check_deps(transformed_knl, stmt1_dep_exp, len_unsatisfied_deps=0):
 # }}}
 
 
-# {{{ test_map_domain_with_dependencies
+# {{{ test map domain with dependencies
 
-def test_map_domain_with_dependencies():
+# {{{ test_map_domain_with_only_partial_dep_pair_affected
 
-    # {{{ make kernel
+def test_map_domain_with_only_partial_dep_pair_affected():
+
+    # Split an iname using map_domain, and have (misaligned) deps
+    # where only the dependee uses the split iname
+
+    # {{{ Make kernel
+
+    knl = lp.make_kernel(
+        [
+            "[nx,nt] -> {[x, t]: 0 <= x < nx and 0 <= t < nt}",
+            "[ni] -> {[i]: 0 <= i < ni}",
+        ],
+        """
+        a[x,t] = b[x,t]  {id=stmta}
+        c[x,t] = d[x,t]  {id=stmtc,dep=stmta}
+        e[i] = f[i]  {id=stmte,dep=stmtc}
+        """,
+        name="wave_equation",
+        lang_version=(2018, 2),
+        )
+    knl = lp.add_and_infer_dtypes(knl, {"b,d,f": np.float32})
+
+    # }}}
+
+    # {{{ Add dependencies
+
+    dep_c_on_a = _isl_map_with_marked_dims(
+        "[nx, nt] -> {{"
+        "[{0}' = 0, x', t'] -> [{0} = 1, x, t] : "
+        "0 <= x,x' < nx and 0 <= t,t' < nt and "
+        "t' <= t and x' <= x"
+        "}}".format(STATEMENT_VAR_NAME))
+
+    knl = lp.add_stmt_inst_dependency(
+        knl, "stmtc", "stmta", dep_c_on_a)
+
+    # Intentionally make order of x and t different from transform_map below
+    # to test alignment steps in map_domain
+    dep_e_on_c = _isl_map_with_marked_dims(
+        "[nx, nt, ni] -> {{"
+        "[{0}' = 0, t', x'] -> [{0} = 1, i] : "
+        "0 <= x' < nx and 0 <= t' < nt and 0 <= i < ni"
+        "}}".format(STATEMENT_VAR_NAME))
+
+    knl = lp.add_stmt_inst_dependency(
+        knl, "stmte", "stmtc", dep_e_on_c)
+
+    # }}}
+
+    # {{{ Apply domain change mapping
+
+    # Create map_domain mapping:
+    import islpy as isl
+    transform_map = isl.BasicMap(
+        "[nx,nt] -> {[x, t] -> [x_, t_outer, t_inner]: "
+        "x = x_ and "
+        "0 <= t_inner < 32 and "
+        "32*t_outer + t_inner = t and "
+        "0 <= 32*t_outer + t_inner < nt}")
+
+    # Call map_domain to transform kernel
+    knl = lp.map_domain(knl, transform_map, rename_after={"x_": "x"})
+
+    # Prioritize loops (prio should eventually be updated in map_domain?)
+    knl = lp.prioritize_loops(knl, "x, t_outer, t_inner")
+
+    # }}}
+
+    # {{{ Create expected dependencies
+
+    dep_c_on_a_exp = _isl_map_with_marked_dims(
+        "[nx, nt] -> {{"
+        "[{0}' = 0, x', t_outer', t_inner'] -> [{0} = 1, x, t_outer, t_inner] : "
+        "0 <= x,x' < nx and "  # old bounds
+        "0 <= t_inner,t_inner' < 32 and "  # new bounds
+        "0 <= 32*t_outer + t_inner < nt and "  # new bounds
+        "0 <= 32*t_outer' + t_inner' < nt and "  # new bounds
+        "32*t_outer' + t_inner' <= 32*t_outer + t_inner and "  # new constraint t' <= t
+        "x' <= x" # old constraint
+        "}}".format(STATEMENT_VAR_NAME))
+
+    dep_e_on_c_exp = _isl_map_with_marked_dims(
+        "[nx, nt, ni] -> {{"
+        "[{0}' = 0, x', t_outer', t_inner'] -> [{0} = 1, i] : "
+        "0 <= x' < nx and 0 <= i < ni and "  # old bounds
+        "0 <= t_inner' < 32 and "  # new bounds
+        "0 <= 32*t_outer' + t_inner' < nt"  # new bounds
+        "}}".format(STATEMENT_VAR_NAME))
+
+    # }}}
+
+    # {{{ Make sure expected deps match found deps
+
+    # TODO make func for comparing deps
+
+    deps_c_found = knl.id_to_insn["stmtc"].dependencies
+    assert len(deps_c_found) == 1
+    deps_c_on_a_found = deps_c_found["stmta"]
+    assert len(deps_c_on_a_found) == 1
+
+    deps_e_found = knl.id_to_insn["stmte"].dependencies
+    assert len(deps_e_found) == 1
+    deps_e_on_c_found = deps_e_found["stmtc"]
+    assert len(deps_e_on_c_found) == 1
+
+    _align_and_compare_maps([
+        (dep_c_on_a_exp, deps_c_on_a_found[0]),
+        (dep_e_on_c_exp, deps_e_on_c_found[0]),
+        ])
+
+    # }}}
+
+    # Make sure deps are satisfied
+    proc_knl = lp.preprocess_kernel(knl)
+    lin_knl = lp.get_one_linearized_kernel(proc_knl)
+    unsatisfied_deps = lp.find_unsatisfied_dependencies(
+        proc_knl, lin_knl.linearization)
+    assert not unsatisfied_deps
+
+    # }}}
+
+
+# {{{ test_map_domain_with_stencil_dependencies
+
+def test_map_domain_with_stencil_dependencies():
+
+    # {{{ Make kernel
 
     knl = lp.make_kernel(
         "[nx,nt] -> {[ix, it]: 1<=ix<nx-1 and 0<=it<nt}",
@@ -2023,7 +2149,7 @@ def test_map_domain_with_dependencies():
 
     # }}}
 
-    # {{{ add dependency
+    # {{{ Add dependency
 
     dep_map = _isl_map_with_marked_dims(
         "[nx, nt] -> {{"
@@ -2116,6 +2242,8 @@ def test_map_domain_with_dependencies():
 
     assert not unsatisfied_deps
 
+    # }}}
+
 # }}}
 
 # }}}

From 6fe250d7ed56c01d16c36457acc08c9604ce9c9a Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Mon, 19 Apr 2021 22:34:21 -0500
Subject: [PATCH 298/460] make helper function _compare_dependencies() to make
 testing dep handling easier

---
 test/test_linearization_checker.py | 377 +++++++++++++++--------------
 1 file changed, 189 insertions(+), 188 deletions(-)

diff --git a/test/test_linearization_checker.py b/test/test_linearization_checker.py
index 8f700adf4..54970ea75 100644
--- a/test/test_linearization_checker.py
+++ b/test/test_linearization_checker.py
@@ -1342,6 +1342,45 @@ def test_sios_with_matmul():
 
 # {{{ Dependency tests
 
+# {{{ Helper functions
+
+
+def _compare_dependencies(knl, deps_expected, return_unsatisfied=False):
+
+    deps_found = {}
+    for stmt in knl.instructions:
+        if hasattr(stmt, "dependencies") and stmt.dependencies:
+            deps_found[stmt.id] = stmt.dependencies
+
+    assert deps_found.keys() == deps_expected.keys()
+
+    for stmt_id_after, dep_dict_found in deps_found.items():
+
+        dep_dict_expected = deps_expected[stmt_id_after]
+
+        # Ensure deps for stmt_id_after match
+        assert dep_dict_found.keys() == dep_dict_expected.keys()
+
+        for stmt_id_before, dep_list_found in dep_dict_found.items():
+
+            # Ensure deps from (stmt_id_before -> stmt_id_after) match
+            dep_list_expected = dep_dict_expected[stmt_id_before]
+            assert len(dep_list_found) == len(dep_list_expected)
+            _align_and_compare_maps(zip(dep_list_found, dep_list_expected))
+
+    if not return_unsatisfied:
+        return
+
+    # Get unsatisfied deps
+    proc_knl = preprocess_kernel(knl)
+    lin_knl = get_one_linearized_kernel(proc_knl)
+    lin_items = lin_knl.linearization
+    return lp.find_unsatisfied_dependencies(proc_knl, lin_items)
+
+
+# }}}
+
+
 # {{{ Dependency creation and checking (without transformations)
 
 # {{{ test_add_stmt_inst_dependency
@@ -1381,13 +1420,10 @@ def test_add_stmt_inst_dependency():
 
     knl = lp.add_stmt_inst_dependency(knl, "stmt_b", "stmt_a", dep_b_on_a)
 
-    for stmt in knl.instructions:
-        if stmt.id == "stmt_b":
-            assert stmt.dependencies == {
-                "stmt_a": [dep_b_on_a, ],
-                }
-        else:
-            assert not stmt.dependencies
+    _compare_dependencies(
+        knl,
+        {"stmt_b": {
+            "stmt_a": [dep_b_on_a, ]}})
 
     # Add a second dependency to stmt_b
     dep_b_on_a_2 = _isl_map_with_marked_dims(
@@ -1401,13 +1437,10 @@ def test_add_stmt_inst_dependency():
 
     knl = lp.add_stmt_inst_dependency(knl, "stmt_b", "stmt_a", dep_b_on_a_2)
 
-    for stmt in knl.instructions:
-        if stmt.id == "stmt_b":
-            assert stmt.dependencies == {
-                "stmt_a": [dep_b_on_a, dep_b_on_a_2],
-                }
-        else:
-            assert not stmt.dependencies
+    _compare_dependencies(
+        knl,
+        {"stmt_b": {
+            "stmt_a": [dep_b_on_a, dep_b_on_a_2]}})
 
     # Add dependencies to stmt_c
 
@@ -1431,30 +1464,20 @@ def test_add_stmt_inst_dependency():
     knl = lp.add_stmt_inst_dependency(knl, "stmt_c", "stmt_a", dep_c_on_a)
     knl = lp.add_stmt_inst_dependency(knl, "stmt_c", "stmt_b", dep_c_on_b)
 
-    for stmt in knl.instructions:
-        if stmt.id == "stmt_b":
-            assert stmt.dependencies == {
-                "stmt_a": [dep_b_on_a, dep_b_on_a_2],
-                }
-        elif stmt.id == "stmt_c":
-            assert stmt.dependencies == {
-                "stmt_a": [dep_c_on_a, ],
-                "stmt_b": [dep_c_on_b, ],
-                }
-        else:
-            assert not stmt.dependencies
-
-    # Now make sure deps are satisfied
-    proc_knl = preprocess_kernel(knl)
-    lin_knl = get_one_linearized_kernel(proc_knl)
-    lin_items = lin_knl.linearization
-
-    unsatisfied_deps = lp.find_unsatisfied_dependencies(
-        proc_knl, lin_items)
+    # Compare deps and make sure they are satisfied
+    unsatisfied_deps = _compare_dependencies(
+        knl,
+        {
+            "stmt_b": {
+                "stmt_a": [dep_b_on_a, dep_b_on_a_2]},
+            "stmt_c": {
+                "stmt_a": [dep_c_on_a, ], "stmt_b": [dep_c_on_b, ]},
+        },
+        return_unsatisfied=True)
 
     assert not unsatisfied_deps
 
-# }}}
+    # }}}
 
 
 # {{{ test_new_dependencies_finite_diff
@@ -1490,15 +1513,12 @@ def test_new_dependencies_finite_diff():
     # Prioritize loops correctly
     knl = lp.prioritize_loops(knl, "t,x")
 
-    # Make sure deps are satisfied
-    proc_knl = preprocess_kernel(knl)
-    lin_knl = get_one_linearized_kernel(proc_knl)
-    lin_items = lin_knl.linearization
-
-    unsatisfied_deps = lp.find_unsatisfied_dependencies(
-        proc_knl, lin_items)
+    # Compare deps and make sure they are satisfied
+    unsatisfied_deps = _compare_dependencies(
+        knl,
+        {"stmt": {"stmt": [dep, ]}, },
+        return_unsatisfied=True)
 
-    print(lp.generate_code_v2(lin_knl).device_code())
     assert not unsatisfied_deps
 
     # }}}
@@ -1508,15 +1528,12 @@ def test_new_dependencies_finite_diff():
     knl = ref_knl
     knl = lp.prioritize_loops(knl, "x,t")
 
-    # Make sure unsatisfied deps are caught
-    proc_knl = preprocess_kernel(knl)
-    lin_knl = get_one_linearized_kernel(proc_knl)
-    lin_items = lin_knl.linearization
-
-    unsatisfied_deps = lp.find_unsatisfied_dependencies(
-        proc_knl, lin_items)
+    # Compare deps and make sure unsatisfied deps are caught
+    unsatisfied_deps = _compare_dependencies(
+        knl,
+        {"stmt": {"stmt": [dep, ]}, },
+        return_unsatisfied=True)
 
-    print(lp.generate_code_v2(lin_knl).device_code())
     assert len(unsatisfied_deps) == 1
 
     # }}}
@@ -1560,14 +1577,16 @@ def test_new_dependencies_finite_diff():
     knl = lp.add_dtypes(
         knl, {"u": np.float32, "dx": np.float32, "dt": np.float32})
 
-    # Make sure deps are satisfied
-    proc_knl = preprocess_kernel(knl)
-    lin_knl = get_one_linearized_kernel(proc_knl)
-    lin_items = lin_knl.linearization
-    print(lp.generate_code_v2(lin_knl).device_code())
+    knl = lp.add_stmt_inst_dependency(knl, "stmt", "stmt", dep)
 
-    unsatisfied_deps = lp.find_unsatisfied_dependencies(
-        proc_knl, lin_items)
+    knl = lp.prioritize_loops(knl, "t,x")
+    knl = lp.tag_inames(knl, "x:l.0")
+
+    # Compare deps and make sure they are satisfied
+    unsatisfied_deps = _compare_dependencies(
+        knl,
+        {"stmt": {"stmt": [dep, ]}, },
+        return_unsatisfied=True)
 
     assert not unsatisfied_deps
 
@@ -1617,14 +1636,16 @@ def test_fix_parameters_with_dependencies():
         "and i' = i and j' = j"
         "}}".format(STATEMENT_VAR_NAME, fix_val))
 
-    for stmt_id, dep_id in [("stmt1", "stmt0"), ("stmt2", "stmt1")]:
-        deps_found = knl.id_to_insn[stmt_id].dependencies
-
-        assert set(deps_found.keys()) == set([dep_id])
-        assert len(deps_found[dep_id]) == 1
+    # Compare deps and make sure they are satisfied
+    unsatisfied_deps = _compare_dependencies(
+        knl,
+        {
+            "stmt1": {"stmt0": [dep_exp, ]},
+            "stmt2": {"stmt1": [dep_exp, ]},
+        },
+        return_unsatisfied=True)
 
-        # Check dep
-        _align_and_compare_maps([(dep_exp, deps_found[dep_id][0])])
+    assert not unsatisfied_deps
 
 # }}}
 
@@ -1665,17 +1686,19 @@ def test_assignment_to_subst_with_dependencies():
 
     knl = lp.assignment_to_subst(knl, "tsq")
 
-    for stmt_id in ["stmt2", "stmt3"]:
-        deps_found = knl.id_to_insn[stmt_id].dependencies
-
-        # Dep on stmt1 should have been removed
-        assert set(deps_found.keys()) == set(["stmt0"])
-        assert len(deps_found["stmt0"]) == 1
-
-        # Should now depend on stmt0
-        _align_and_compare_maps([(dep_le, deps_found["stmt0"][0])])
+    # Compare deps and make sure they are satisfied
+    unsatisfied_deps = _compare_dependencies(
+        knl,
+        {
+            "stmt2": {"stmt0": [dep_le, ]},
+            "stmt3": {"stmt0": [dep_le, ]},
+        },
+        return_unsatisfied=True)
+    # (stmt4 dep was removed because dependee was removed, but dependee's
+    # deps were not added to stmt4 because the substitution was not made
+    # in stmt4) TODO this behavior will change when we propagate deps properly
 
-    assert not knl.id_to_insn["stmt4"].dependencies
+    assert not unsatisfied_deps
 
     # Test using 'within' --------------------------------------------------
 
@@ -1700,34 +1723,26 @@ def test_assignment_to_subst_with_dependencies():
 
     knl = lp.assignment_to_subst(knl, "tsq", within="id:stmt2 or id:stmt3")
 
-    # replacement will not be made in stmt5, so stmt1 will not be removed,
-    # which means no deps will be removed, and the statements were the replacement
+    # Replacement will not be made in stmt5, so stmt1 will not be removed,
+    # which means no deps will be removed, and the statements where the replacement
     # *was* made (stmt2 and stmt3) will still receive the deps from stmt1
+    # TODO this behavior may change when we propagate deps properly
 
-    for stmt_id in ["stmt2", "stmt3"]:
-        deps_found = knl.id_to_insn[stmt_id].dependencies
-
-        # Dep on stmt1 should NOT have been removed
-        # (for now? could maybe do something smarter)
-        assert set(deps_found.keys()) == set(["stmt0", "stmt1"])
-        assert len(deps_found["stmt0"]) == len(deps_found["stmt1"]) == 1
-
-        # Should now depend on stmt0
-        _align_and_compare_maps([(dep_le, deps_found["stmt0"][0])])
-
-        # Should still depend on stmt1
-        _align_and_compare_maps([(dep_eq, deps_found["stmt1"][0])])
-
-    for stmt_id in ["stmt4", "stmt5"]:
-        deps_found = knl.id_to_insn[stmt_id].dependencies
-
-        # Dep on stmt1 should NOT have been removed
-        # (for now? could maybe do something smarter)
-        assert set(deps_found.keys()) == set(["stmt1"])
-        assert len(deps_found["stmt1"]) == 1
+    # Compare deps and make sure they are satisfied
+    unsatisfied_deps = _compare_dependencies(
+        knl,
+        {
+            "stmt1": {"stmt0": [dep_le, ]},
+            "stmt2": {
+                "stmt0": [dep_le, ], "stmt1": [dep_eq, ]},
+            "stmt3": {
+                "stmt0": [dep_le, ], "stmt1": [dep_eq, ]},
+            "stmt4": {"stmt1": [dep_eq, ]},
+            "stmt5": {"stmt1": [dep_eq, ]},
+        },
+        return_unsatisfied=True)
 
-        # Should still depend on stmt1
-        _align_and_compare_maps([(dep_eq, deps_found["stmt1"][0])])
+    assert not unsatisfied_deps
 
 # }}}
 
@@ -1754,24 +1769,6 @@ def test_duplicate_inames_with_dependencies():
 
     ref_knl = knl
 
-    def _check_deps(transformed_knl, c_dep_exp):
-        b_deps = transformed_knl.id_to_insn["stmtb"].dependencies
-        c_deps = transformed_knl.id_to_insn["stmtc"].dependencies
-
-        assert not b_deps
-        assert len(c_deps) == 1
-        assert len(c_deps["stmtb"]) == 1
-        _align_and_compare_maps([(c_deps["stmtb"][0], c_dep_exp)])
-
-        # Check dep satisfaction
-        proc_knl = preprocess_kernel(transformed_knl)
-        lin_knl = get_one_linearized_kernel(proc_knl)
-        lin_items = lin_knl.linearization
-        unsatisfied_deps = lp.find_unsatisfied_dependencies(
-            proc_knl, lin_items)
-
-        assert not unsatisfied_deps
-
     # {{{ Duplicate j within stmtc
 
     knl = lp.duplicate_inames(knl, ["j"], within="id:stmtc", new_inames=["j_new"])
@@ -1781,7 +1778,13 @@ def _check_deps(transformed_knl, c_dep_exp):
         "0 <= i,i',j_new,j' < n and i' = i and j' = j_new"
         "}}".format(STATEMENT_VAR_NAME))
 
-    _check_deps(knl, dep_exp)
+    # Compare deps and make sure they are satisfied
+    unsatisfied_deps = _compare_dependencies(
+        knl,
+        {"stmtc": {"stmtb": [dep_exp, ]}},
+        return_unsatisfied=True)
+
+    assert not unsatisfied_deps
 
     # }}}
 
@@ -1795,7 +1798,13 @@ def _check_deps(transformed_knl, c_dep_exp):
         "0 <= i,i',j,j_new' < n and i' = i and j_new' = j"
         "}}".format(STATEMENT_VAR_NAME))
 
-    _check_deps(knl, dep_exp)
+    # Compare deps and make sure they are satisfied
+    unsatisfied_deps = _compare_dependencies(
+        knl,
+        {"stmtc": {"stmtb": [dep_exp, ]}},
+        return_unsatisfied=True)
+
+    assert not unsatisfied_deps
 
     # }}}
 
@@ -1810,7 +1819,13 @@ def _check_deps(transformed_knl, c_dep_exp):
         "0 <= i,i',j_new,j_new' < n and i' = i and j_new' = j_new"
         "}}".format(STATEMENT_VAR_NAME))
 
-    _check_deps(knl, dep_exp)
+    # Compare deps and make sure they are satisfied
+    unsatisfied_deps = _compare_dependencies(
+        knl,
+        {"stmtc": {"stmtb": [dep_exp, ]}},
+        return_unsatisfied=True)
+
+    assert not unsatisfied_deps
 
     # }}}
 
@@ -1834,24 +1849,6 @@ def test_split_iname_with_dependencies():
     from copy import deepcopy
     ref_knl = deepcopy(knl)  # without deepcopy, deps get applied to ref_knl
 
-    def _check_deps(transformed_knl, stmt1_dep_exp, len_unsatisfied_deps=0):
-        stmt0_deps = transformed_knl.id_to_insn["stmt0"].dependencies
-        stmt1_deps = transformed_knl.id_to_insn["stmt1"].dependencies
-
-        assert not stmt0_deps
-        assert len(stmt1_deps) == 1
-        assert len(stmt1_deps["stmt0"]) == 1
-        _align_and_compare_maps([(stmt1_deps["stmt0"][0], stmt1_dep_exp)])
-
-        # Check dep satisfaction
-        proc_knl = preprocess_kernel(transformed_knl)
-        lin_knl = get_one_linearized_kernel(proc_knl)
-        lin_items = lin_knl.linearization
-        unsatisfied_deps = lp.find_unsatisfied_dependencies(
-            proc_knl, lin_items)
-
-        assert len(unsatisfied_deps) == len_unsatisfied_deps
-
     # {{{ Split iname and make sure dep is correct
 
     dep_inout_space_str = "[{0}'=0, i'] -> [{0}=1, i]".format(STATEMENT_VAR_NAME)
@@ -1870,7 +1867,13 @@ def _check_deps(transformed_knl, stmt1_dep_exp, len_unsatisfied_deps=0):
         " and i_inner + 32*i_outer = 32*i_outer' + i_inner'"  # i = i'
         "}}".format(STATEMENT_VAR_NAME))
 
-    _check_deps(knl, dep_exp)
+    # Compare deps and make sure they are satisfied
+    unsatisfied_deps = _compare_dependencies(
+        knl,
+        {"stmt1": {"stmt0": [dep_exp, ]}},
+        return_unsatisfied=True)
+
+    assert not unsatisfied_deps
 
     # }}}
 
@@ -1889,7 +1892,13 @@ def _check_deps(transformed_knl, stmt1_dep_exp, len_unsatisfied_deps=0):
         " and i_inner + 32*i_outer = i'"  # transform {i = i'}
         "}}".format(STATEMENT_VAR_NAME))
 
-    _check_deps(knl, dep_exp)
+    # Compare deps and make sure they are satisfied
+    unsatisfied_deps = _compare_dependencies(
+        knl,
+        {"stmt1": {"stmt0": [dep_exp, ]}},
+        return_unsatisfied=True)
+
+    assert not unsatisfied_deps
 
     # }}}
 
@@ -1908,7 +1917,13 @@ def _check_deps(transformed_knl, stmt1_dep_exp, len_unsatisfied_deps=0):
         " and i = 32*i_outer' + i_inner'"  # transform {i = i'}
         "}}".format(STATEMENT_VAR_NAME))
 
-    _check_deps(knl, dep_exp)
+    # Compare deps and make sure they are satisfied
+    unsatisfied_deps = _compare_dependencies(
+        knl,
+        {"stmt1": {"stmt0": [dep_exp, ]}},
+        return_unsatisfied=True)
+
+    assert not unsatisfied_deps
 
     # }}}
 
@@ -1931,7 +1946,13 @@ def _check_deps(transformed_knl, stmt1_dep_exp, len_unsatisfied_deps=0):
         " and i_inner + 32*i_outer + 1 = 32*i_outer' + i_inner'"  # i' = i + 1
         "}}".format(STATEMENT_VAR_NAME))
 
-    _check_deps(knl, dep_exp, len_unsatisfied_deps=1)
+    # Compare deps and make sure they are satisfied
+    unsatisfied_deps = _compare_dependencies(
+        knl,
+        {"stmt1": {"stmt0": [dep_exp, ]}},
+        return_unsatisfied=True)
+
+    assert len(unsatisfied_deps) == 1
 
     # }}}
 
@@ -2081,8 +2102,8 @@ def test_map_domain_with_only_partial_dep_pair_affected():
         "0 <= t_inner,t_inner' < 32 and "  # new bounds
         "0 <= 32*t_outer + t_inner < nt and "  # new bounds
         "0 <= 32*t_outer' + t_inner' < nt and "  # new bounds
-        "32*t_outer' + t_inner' <= 32*t_outer + t_inner and "  # new constraint t' <= t
-        "x' <= x" # old constraint
+        "32*t_outer' + t_inner' <= 32*t_outer + t_inner and "  # new constraint t'<=t
+        "x' <= x"  # old constraint
         "}}".format(STATEMENT_VAR_NAME))
 
     dep_e_on_c_exp = _isl_map_with_marked_dims(
@@ -2095,36 +2116,25 @@ def test_map_domain_with_only_partial_dep_pair_affected():
 
     # }}}
 
-    # {{{ Make sure expected deps match found deps
-
-    # TODO make func for comparing deps
-
-    deps_c_found = knl.id_to_insn["stmtc"].dependencies
-    assert len(deps_c_found) == 1
-    deps_c_on_a_found = deps_c_found["stmta"]
-    assert len(deps_c_on_a_found) == 1
+    # {{{ Make sure deps are correct and satisfied
 
-    deps_e_found = knl.id_to_insn["stmte"].dependencies
-    assert len(deps_e_found) == 1
-    deps_e_on_c_found = deps_e_found["stmtc"]
-    assert len(deps_e_on_c_found) == 1
-
-    _align_and_compare_maps([
-        (dep_c_on_a_exp, deps_c_on_a_found[0]),
-        (dep_e_on_c_exp, deps_e_on_c_found[0]),
-        ])
-
-    # }}}
+    # Compare deps and make sure they are satisfied
+    unsatisfied_deps = _compare_dependencies(
+        knl,
+        {
+            "stmtc": {
+                "stmta": [dep_c_on_a_exp, ]},
+            "stmte": {
+                "stmtc": [dep_e_on_c_exp, ]},
+        },
+        return_unsatisfied=True)
 
-    # Make sure deps are satisfied
-    proc_knl = lp.preprocess_kernel(knl)
-    lin_knl = lp.get_one_linearized_kernel(proc_knl)
-    unsatisfied_deps = lp.find_unsatisfied_dependencies(
-        proc_knl, lin_knl.linearization)
     assert not unsatisfied_deps
 
     # }}}
 
+# }}}
+
 
 # {{{ test_map_domain_with_stencil_dependencies
 
@@ -2173,16 +2183,12 @@ def test_map_domain_with_stencil_dependencies():
     knl = lp.prioritize_loops(knl, ("it", "ix"))  # valid
     #knl = lp.prioritize_loops(knl, ("ix", "it"))  # invalid
 
-    # Get a linearization
-    proc_knl = lp.preprocess_kernel(knl)
-    lin_knl = lp.get_one_linearized_kernel(proc_knl)
+    # Compare deps and make sure they are satisfied
+    unsatisfied_deps = _compare_dependencies(
+        knl,
+        {"stmt": {"stmt": [dep_map, ]}},
+        return_unsatisfied=True)
 
-    # Check dependencies
-    dep_found = proc_knl.id_to_insn[stmt_after].dependencies[stmt_before][0]
-    assert dep_found.get_var_dict() == dep_map.get_var_dict()
-    assert dep_found == dep_map
-    unsatisfied_deps = lp.find_unsatisfied_dependencies(
-        proc_knl, lin_knl.linearization)
     assert not unsatisfied_deps
 
     # }}}
@@ -2229,16 +2235,11 @@ def test_map_domain_with_stencil_dependencies():
 
     # }}}
 
-    # Get a linearization
-    proc_knl = lp.preprocess_kernel(knl)
-    lin_knl = lp.get_one_linearized_kernel(proc_knl)
-
-    # Check dependencies
-    dep_found = proc_knl.id_to_insn[stmt_after].dependencies[stmt_before][0]
-    assert dep_found.get_var_dict() == mapped_dep_map.get_var_dict()
-    assert dep_found == mapped_dep_map
-    unsatisfied_deps = lp.find_unsatisfied_dependencies(
-        proc_knl, lin_knl.linearization)
+    # Compare deps and make sure they are satisfied
+    unsatisfied_deps = _compare_dependencies(
+        knl,
+        {"stmt": {"stmt": [mapped_dep_map, ]}},
+        return_unsatisfied=True)
 
     assert not unsatisfied_deps
 

From 0166f8d1b5df9a48e722f19259cc4f2b0c32ca1d Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Mon, 19 Apr 2021 22:37:09 -0500
Subject: [PATCH 299/460] slight clarification of comment

---
 loopy/transform/iname.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/loopy/transform/iname.py b/loopy/transform/iname.py
index 534d553ab..c594b0e6a 100644
--- a/loopy/transform/iname.py
+++ b/loopy/transform/iname.py
@@ -2050,7 +2050,7 @@ def process_set(s):
 
     # {{{ Rename inames according to rename_after dict
 
-    # This is currently an option because various isl operations fail when map dim
+    # This renaming option exists because various isl operations fail when map dim
     # names are not unique, so even if someone wants their transformation map to keep
     # one of the inames unchanged, they must give it a new name
     # in their map, e.g., "[x, t] -> [x_, t_outer, t_inner] : x_ = x ..." (see

From 9c0c2dfe48aea7356af718251d5b1bf109b6378b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Andreas=20Kl=C3=B6ckner?= <inform@tiker.net>
Date: Tue, 20 Apr 2021 12:16:59 -0500
Subject: [PATCH 300/460] Add map_domain FIXME

---
 loopy/transform/iname.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/loopy/transform/iname.py b/loopy/transform/iname.py
index c594b0e6a..0c768759c 100644
--- a/loopy/transform/iname.py
+++ b/loopy/transform/iname.py
@@ -1900,6 +1900,8 @@ def map_domain(kernel, isl_map, within=None, rename_after={}):
     # FIXME: Document
 
     # FIXME: Support within
+    # FIXME: Right now, this requires all inames in a domain (or none) to
+    # be mapped. That makes this awkward to use.
 
     # {{{ within processing (disabled for now)
     if within is not None:

From ab983950e8c8b8e4ae6e7f3d531471de1e2897d4 Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Fri, 23 Apr 2021 17:38:41 -0500
Subject: [PATCH 301/460] fix flake8 issues

---
 loopy/transform/iname.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/loopy/transform/iname.py b/loopy/transform/iname.py
index e9b92d62e..29a27f074 100644
--- a/loopy/transform/iname.py
+++ b/loopy/transform/iname.py
@@ -2049,13 +2049,13 @@ def process_set(s):
 
     # {{{ Rename inames according to rename_after dict
 
-    # This renaming option exists because various isl operations fail when map dim
-    # names are not unique, so even if someone wants their transformation map to keep
-    # one of the inames unchanged, they must give it a new name
+    # This renaming option exists because various isl operations fail when map
+    # dim names are not unique, so even if someone wants their transformation
+    # map to keep one of the inames unchanged, they must give it a new name
     # in their map, e.g., "[x, t] -> [x_, t_outer, t_inner] : x_ = x ..." (see
     # test_map_domain_vs_split_iname()). Currently, they can't
-    # simply exclude that iname from the transformation map because, as stated in
-    # the error above, all domains must either involve all or none of the
+    # simply exclude that iname from the transformation map because, as stated
+    # in the error above, all domains must either involve all or none of the
     # transform map domain inames. This renaming option lets them, e.g. switch
     # an iname back to its original name.
 

From 6354191d34658053a0c15509a657cb37d9609564 Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Fri, 23 Apr 2021 17:41:18 -0500
Subject: [PATCH 302/460] fix flake8 issues

---
 loopy/transform/iname.py | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/loopy/transform/iname.py b/loopy/transform/iname.py
index 9884d25e3..8758284ef 100644
--- a/loopy/transform/iname.py
+++ b/loopy/transform/iname.py
@@ -2171,7 +2171,8 @@ def process_set(s):
     dep_transform_map_marked = append_mark_to_isl_map_var_names(
         isl_map, dt.in_, BEFORE_MARK)
 
-    # Insert 'statement' dim into transform maps (mark the 'in' statement in BOTH cases)
+    # Insert 'statement' dim into transform maps
+    # (mark the 'in' statement in BOTH cases)
 
     # NOTE: dims must all be named correctly for the alignment to work, but dim names
     # must also be unique, so the output statement var name can't match the input
@@ -2187,7 +2188,7 @@ def process_set(s):
     dep_transform_map_marked = insert_and_name_isl_dims(
         dep_transform_map_marked, dt.in_, [STATEMENT_VAR_NAME+BEFORE_MARK], 0)
     dep_transform_map_marked = insert_and_name_isl_dims(
-       dep_transform_map_marked, dt.out, [STATEMENT_VAR_NAME], 0)
+        dep_transform_map_marked, dt.out, [STATEMENT_VAR_NAME], 0)
     # Add stmt = stmt' constraint
     dep_transform_map_marked = add_eq_isl_constraint_from_names(
         dep_transform_map_marked, STATEMENT_VAR_NAME, STATEMENT_VAR_NAME+BEFORE_MARK)
@@ -2197,7 +2198,7 @@ def process_set(s):
     dep_transform_map = insert_and_name_isl_dims(
         isl_map, dt.in_, [STATEMENT_VAR_NAME], 0)
     dep_transform_map = insert_and_name_isl_dims(
-       dep_transform_map, dt.out, [temp_stmt_var], 0)
+        dep_transform_map, dt.out, [temp_stmt_var], 0)
     # Add stmt = temp_stmt_var constraint
     dep_transform_map = add_eq_isl_constraint_from_names(
         dep_transform_map, STATEMENT_VAR_NAME, temp_stmt_var)
@@ -2247,7 +2248,8 @@ def _apply_transform_map_to_dependee(dep_map):
             transformed_dep_map = dep_map.apply_domain(dep_transform_map_aligned)
 
             # Now re-add the before marks
-            return append_mark_to_isl_map_var_names(transformed_dep_map, dt.in_, BEFORE_MARK)
+            return append_mark_to_isl_map_var_names(
+                transformed_dep_map, dt.in_, BEFORE_MARK)
 
     # TODO figure out proper way to create false match condition
     false_id_match = "not id:*"

From bc1c27b8f8ed73a7fcd0b1fc4669f9d4ad1237bc Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Fri, 23 Apr 2021 17:58:04 -0500
Subject: [PATCH 303/460] rename add_stmt_inst_dependency->add_dependency_v2

---
 loopy/__init__.py                  |  4 ++--
 loopy/transform/instruction.py     |  4 ++--
 test/test_linearization_checker.py | 14 +++++++-------
 3 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/loopy/__init__.py b/loopy/__init__.py
index f94cbe021..36196206b 100644
--- a/loopy/__init__.py
+++ b/loopy/__init__.py
@@ -79,7 +79,7 @@
 from loopy.transform.instruction import (
         find_instructions, map_instructions,
         set_instruction_priority,
-        add_dependency, add_stmt_inst_dependency,
+        add_dependency, add_dependency_v2,
         remove_instructions,
         replace_instruction_ids,
         tag_instructions,
@@ -206,7 +206,7 @@
 
         "find_instructions", "map_instructions",
         "set_instruction_priority",
-        "add_dependency", "add_stmt_inst_dependency",
+        "add_dependency", "add_dependency_v2",
         "remove_instructions",
         "replace_instruction_ids",
         "tag_instructions",
diff --git a/loopy/transform/instruction.py b/loopy/transform/instruction.py
index 73bc9ba13..8b539ca5f 100644
--- a/loopy/transform/instruction.py
+++ b/loopy/transform/instruction.py
@@ -117,9 +117,9 @@ def add_dep(insn):
 # }}}
 
 
-# {{{ add_stmt_inst_dependency
+# {{{ add_dependency_v2
 
-def add_stmt_inst_dependency(
+def add_dependency_v2(
         kernel, stmt_id, depends_on_id, new_dependency):
     """Add the statement instance dependency `new_dependency` to the statement with
     id `stmt_id`.
diff --git a/test/test_linearization_checker.py b/test/test_linearization_checker.py
index 3ed214bf6..3192688ec 100644
--- a/test/test_linearization_checker.py
+++ b/test/test_linearization_checker.py
@@ -1342,9 +1342,9 @@ def test_sios_with_matmul():
 
 # {{{ Dependency tests
 
-# {{{ test_add_stmt_inst_dependency
+# {{{ test_add_dependency_v2
 
-def test_add_stmt_inst_dependency():
+def test_add_dependency_v2():
 
     # Make kernel and use OLD deps to control linearization order for now
     i_range_str = "0 <= i < pi"
@@ -1377,7 +1377,7 @@ def test_add_stmt_inst_dependency():
             assumptions_str,
             ))
 
-    knl = lp.add_stmt_inst_dependency(knl, "stmt_b", "stmt_a", dep_b_on_a)
+    knl = lp.add_dependency_v2(knl, "stmt_b", "stmt_a", dep_b_on_a)
 
     for stmt in knl.instructions:
         if stmt.id == "stmt_b":
@@ -1397,7 +1397,7 @@ def test_add_stmt_inst_dependency():
             assumptions_str,
             ))
 
-    knl = lp.add_stmt_inst_dependency(knl, "stmt_b", "stmt_a", dep_b_on_a_2)
+    knl = lp.add_dependency_v2(knl, "stmt_b", "stmt_a", dep_b_on_a_2)
 
     for stmt in knl.instructions:
         if stmt.id == "stmt_b":
@@ -1426,8 +1426,8 @@ def test_add_stmt_inst_dependency():
             assumptions_str,
             ))
 
-    knl = lp.add_stmt_inst_dependency(knl, "stmt_c", "stmt_a", dep_c_on_a)
-    knl = lp.add_stmt_inst_dependency(knl, "stmt_c", "stmt_b", dep_c_on_b)
+    knl = lp.add_dependency_v2(knl, "stmt_c", "stmt_a", dep_c_on_a)
+    knl = lp.add_dependency_v2(knl, "stmt_c", "stmt_b", dep_c_on_b)
 
     for stmt in knl.instructions:
         if stmt.id == "stmt_b":
@@ -1479,7 +1479,7 @@ def test_new_dependencies_finite_diff():
             xt_range_str,
             xt_range_str_p,
             ))
-    knl = lp.add_stmt_inst_dependency(knl, "stmt", "stmt", dep)
+    knl = lp.add_dependency_v2(knl, "stmt", "stmt", dep)
 
     ref_knl = knl
 

From f69cae9e6c18ddcbc79d3acd7b641232775337f0 Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Fri, 23 Apr 2021 20:40:43 -0500
Subject: [PATCH 304/460] create convert_set_back_to_map()

---
 loopy/schedule/checker/utils.py | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/loopy/schedule/checker/utils.py b/loopy/schedule/checker/utils.py
index b4ff9636d..f077b875c 100644
--- a/loopy/schedule/checker/utils.py
+++ b/loopy/schedule/checker/utils.py
@@ -164,6 +164,7 @@ def sorted_union_of_names_in_isl_sets(
 
 
 def convert_map_to_set(isl_map):
+    # also works for spaces
     n_in_dims = len(isl_map.get_var_names(dt.in_))
     n_out_dims = len(isl_map.get_var_names(dt.out))
     return isl_map.move_dims(
@@ -171,6 +172,11 @@ def convert_map_to_set(isl_map):
         ).domain(), n_in_dims, n_out_dims
 
 
+def convert_set_back_to_map(isl_set, n_old_in_dims, n_old_out_dims):
+    return isl.Map.from_domain(
+        isl_set).move_dims(dt.out, 0, dt.in_, n_old_in_dims, n_old_out_dims)
+
+
 def create_symbolic_map_from_tuples(
         tuple_pairs_with_domains,
         space,

From 94c374efba3023f3c3f9531832309f8287c4fd48 Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Fri, 23 Apr 2021 20:42:14 -0500
Subject: [PATCH 305/460] create dependency.py; create function
 filter_deps_by_intersection_with_SAME() based on simplification of work from
 old linearization checker branch(es)

---
 loopy/schedule/checker/dependency.py | 122 +++++++++++++++++++++++++++
 1 file changed, 122 insertions(+)
 create mode 100644 loopy/schedule/checker/dependency.py

diff --git a/loopy/schedule/checker/dependency.py b/loopy/schedule/checker/dependency.py
new file mode 100644
index 000000000..a0b26c109
--- /dev/null
+++ b/loopy/schedule/checker/dependency.py
@@ -0,0 +1,122 @@
+__copyright__ = "Copyright (C) 2019 James Stevens"
+
+__license__ = """
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+"""
+
+import islpy as isl
+
+
+class DependencyType:
+    """Strings specifying a particular type of dependency relationship.
+
+    .. attribute:: SAME
+
+       A :class:`str` specifying the following dependency relationship:
+
+       If ``S = {i, j, ...}`` is a set of inames used in both statements
+       ``insn0`` and ``insn1``, and ``{i', j', ...}`` represent the values
+       of the inames in ``insn0``, and ``{i, j, ...}`` represent the
+       values of the inames in ``insn1``, then the dependency
+       ``insn0 happens before insn1 iff SAME({i, j})`` specifies that
+       ``insn0 happens before insn1 iff {i' = i and j' = j and ...}``.
+       Note that ``SAME({}) = True``.
+
+    .. attribute:: PRIOR
+
+       A :class:`str` specifying the following dependency relationship:
+
+       If ``S = {i, j, k, ...}`` is a set of inames used in both statements
+       ``insn0`` and ``insn1``, and ``{i', j', k', ...}`` represent the values
+       of the inames in ``insn0``, and ``{i, j, k, ...}`` represent the
+       values of the inames in ``insn1``, then the dependency
+       ``insn0 happens before insn1 iff PRIOR({i, j, k})`` specifies one of
+       two possibilities, depending on whether the loop nest ordering is
+       known. If the loop nest ordering is unknown, then
+       ``insn0 happens before insn1 iff {i' < i and j' < j and k' < k ...}``.
+       If the loop nest ordering is known, the condition becomes
+       ``{i', j', k', ...}`` is lexicographically less than ``{i, j, k, ...}``,
+       i.e., ``i' < i or (i' = i and j' < j) or (i' = i and j' = j and k' < k) ...``.
+
+    """
+
+    SAME = "same"
+    PRIOR = "prior"
+
+
+def filter_deps_by_intersection_with_SAME(knl):
+    # Determine which dep relations have a non-empty intersection with
+    # the SAME relation
+    # TODO document
+
+    from loopy.schedule.checker.utils import (
+        append_mark_to_strings,
+        partition_inames_by_concurrency,
+        create_elementwise_comparison_conjunction_set,
+        convert_map_to_set,
+        convert_set_back_to_map,
+    )
+    from loopy.schedule.checker.schedule import (
+        BEFORE_MARK,
+    )
+    _, non_conc_inames = partition_inames_by_concurrency(knl)
+
+    deps_filtered = set()
+    for stmt in knl.instructions:
+        if hasattr(stmt, 'dependencies') and stmt.dependencies:
+            depender_id = stmt.id
+            for dependee_id, dep_maps in stmt.dependencies:
+                # Continue if we already have this pair
+                if (dependee_id, depender_id) in deps_filtered:
+                    continue
+                for dep_map in dep_maps:
+                    # Create isl map representing "SAME" dep for these two insns
+
+                    # Get shared nonconcurrent inames
+                    depender_inames = knl.id_to_insn[depender_id].within_inames
+                    dependee_inames = knl.id_to_insn[dependee_id].within_inames
+                    shared_nc_inames = (
+                        depender_inames & dependee_inames & non_conc_inames)
+
+                    # Temporarily convert to set
+                    dep_set_space, n_in_dims, n_out_dims = convert_map_to_set(
+                        dep_map.space)
+
+                    # Create SAME relation
+                    same_set_affs = isl.affs_from_space(dep_set_space)
+                    same_set = create_elementwise_comparison_conjunction_set(
+                        shared_nc_inames,
+                        append_mark_to_strings(shared_nc_inames, BEFORE_MARK),
+                        same_set_affs)
+
+                    # Convert back to map
+                    same_map = convert_set_back_to_map(
+                        same_set, n_in_dims, n_out_dims)
+
+                    # Don't need to intersect same_map with iname bounds (I think..?)
+
+                    # See whether the intersection of dep map and SAME is empty
+                    intersect_dep_and_same = same_map & dep_map
+                    intersect_not_empty = not bool(intersect_dep_and_same.is_empty())
+
+                    if intersect_not_empty:
+                        deps_filtered.append((dependee_id, depender_id))
+                        break  # No need to check any more deps for this pair
+
+    return deps_filtered

From 2ed669d01f9d34c45710b087fe23027f872eabc0 Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Fri, 23 Apr 2021 20:46:40 -0500
Subject: [PATCH 306/460] fix flake8 issue

---
 loopy/schedule/checker/dependency.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/loopy/schedule/checker/dependency.py b/loopy/schedule/checker/dependency.py
index a0b26c109..37acc1664 100644
--- a/loopy/schedule/checker/dependency.py
+++ b/loopy/schedule/checker/dependency.py
@@ -79,7 +79,7 @@ def filter_deps_by_intersection_with_SAME(knl):
 
     deps_filtered = set()
     for stmt in knl.instructions:
-        if hasattr(stmt, 'dependencies') and stmt.dependencies:
+        if hasattr(stmt, "dependencies") and stmt.dependencies:
             depender_id = stmt.id
             for dependee_id, dep_maps in stmt.dependencies:
                 # Continue if we already have this pair

From 196e17e302333459aeb15ceda058663537fd5471 Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Fri, 23 Apr 2021 20:47:31 -0500
Subject: [PATCH 307/460] (WIP) start working on cartoon dag creation

---
 loopy/schedule/__init__.py | 24 +++++++++++++++++++++++-
 1 file changed, 23 insertions(+), 1 deletion(-)

diff --git a/loopy/schedule/__init__.py b/loopy/schedule/__init__.py
index c6a9ec3ac..7a2c9b80e 100644
--- a/loopy/schedule/__init__.py
+++ b/loopy/schedule/__init__.py
@@ -1984,13 +1984,35 @@ def generate_loop_schedules_inner(kernel, debug_args={}):
 
     loop_nest_with_map = find_loop_nest_with_map(kernel)
     loop_nest_around_map = find_loop_nest_around_map(kernel)
+
+    # {{{  create dependency graph with edges from depender* to dependee*
+    # iff intersection (SAME_map & DEP_map) is not empty
+
+    from loopy.schedule.checker.dependency import (
+        filter_deps_by_intersection_with_SAME,
+    )
+    from loopy.schedule.checker.utils import (
+        create_graph_from_pairs,
+    )
+
+    # Get dep graph edges with edges from depender->dependee
+    dep_graph_pairs = filter_deps_by_intersection_with_SAME(kernel)
+
+    # Create dep graph from edges
+    insn_depends_on_graph = create_graph_from_pairs(dep_graph_pairs)
+    # TODO create ^this func
+
+    # }}}
+
     sched_state = SchedulerState(
             kernel=kernel,
             loop_nest_around_map=loop_nest_around_map,
             loop_insn_dep_map=find_loop_insn_dep_map(
                 kernel,
                 loop_nest_with_map=loop_nest_with_map,
-                loop_nest_around_map=loop_nest_around_map),
+                loop_nest_around_map=loop_nest_around_map,
+                insn_depends_on_graph=insn_depends_on_graph),  # TODO deal with this
+            insn_depends_on_graph=insn_depends_on_graph,  # TODO deal with this
             breakable_inames=ilp_inames,
             ilp_inames=ilp_inames,
             vec_inames=vec_inames,

From fd754fa89f2cf583cd51b3f9d0f05bb74145d605 Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Sat, 24 Apr 2021 01:22:23 -0500
Subject: [PATCH 308/460] fix minor bugs

---
 loopy/schedule/checker/dependency.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/loopy/schedule/checker/dependency.py b/loopy/schedule/checker/dependency.py
index 37acc1664..1aea2520d 100644
--- a/loopy/schedule/checker/dependency.py
+++ b/loopy/schedule/checker/dependency.py
@@ -81,7 +81,7 @@ def filter_deps_by_intersection_with_SAME(knl):
     for stmt in knl.instructions:
         if hasattr(stmt, "dependencies") and stmt.dependencies:
             depender_id = stmt.id
-            for dependee_id, dep_maps in stmt.dependencies:
+            for dependee_id, dep_maps in stmt.dependencies.items():
                 # Continue if we already have this pair
                 if (dependee_id, depender_id) in deps_filtered:
                     continue
@@ -116,7 +116,7 @@ def filter_deps_by_intersection_with_SAME(knl):
                     intersect_not_empty = not bool(intersect_dep_and_same.is_empty())
 
                     if intersect_not_empty:
-                        deps_filtered.append((dependee_id, depender_id))
+                        deps_filtered.add((dependee_id, depender_id))
                         break  # No need to check any more deps for this pair
 
     return deps_filtered

From 239b6737acbc8a455e30db60326fc1c9b497950b Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Sat, 24 Apr 2021 01:22:54 -0500
Subject: [PATCH 309/460] add test_filtering_deps_by_same()

---
 test/test_linearization_checker.py | 79 ++++++++++++++++++++++++++++++
 1 file changed, 79 insertions(+)

diff --git a/test/test_linearization_checker.py b/test/test_linearization_checker.py
index 5daa9b890..de3a9f416 100644
--- a/test/test_linearization_checker.py
+++ b/test/test_linearization_checker.py
@@ -2251,6 +2251,85 @@ def test_map_domain_with_stencil_dependencies():
 
 # }}}
 
+
+# {{{ Dependency handling during linearization
+
+# {{{ test_filtering_deps_by_same
+
+def test_filtering_deps_by_same():
+
+    # Make a kernel (just need something that can carry deps)
+    knl = lp.make_kernel(
+        "{[i,j,k,m] : 0 <= i,j,k,m < n}",
+        """
+        a[i,j,k,m] = 1 {id=s1}
+        a[i,j,k,m] = 2 {id=s2}
+        a[i,j,k,m] = 3 {id=s3}
+        a[i,j,k,m] = 4 {id=s4}
+        a[i,j,k,m] = 5 {id=s5}
+        """)
+    knl = lp.add_and_infer_dtypes(knl, {"a": np.float32})
+    knl = lp.tag_inames(knl, "m:l.0")
+
+    # Make some deps
+
+    def _dep_with_condition(cond):
+        return _isl_map_with_marked_dims(
+            "[n] -> {{"
+            "[{0}'=0, i', j', k', m'] -> [{0}=0, i, j, k, m] : "
+            "0 <= i,j,k,m,i',j',k',m' < n and {1}"
+            "}}".format(STATEMENT_VAR_NAME, cond))
+
+    dep_s2_on_s1_1 = _dep_with_condition("i' <  i and j' <= j and k' = k and m' < m")
+    dep_s2_on_s1_2 = _dep_with_condition("i' <= i and j' <= j and k' = k and m' < m")
+
+    dep_s2_on_s2_1 = _dep_with_condition("i' <  i and j' <= j and k' = k and m' < m")
+    dep_s2_on_s2_2 = _dep_with_condition("i' <= i and j' <= j and k' = k and m' < m")
+
+    dep_s3_on_s2_1 = _dep_with_condition("i' <  i and j' <  j and k' = k and m' < m")
+    dep_s3_on_s2_2 = _dep_with_condition("i' =  i and j' =  j and k' < k and m' < m")
+
+    dep_s4_on_s3_1 = _dep_with_condition("i' <= i and j' <= j and k' = k")
+    dep_s4_on_s3_2 = _dep_with_condition("i' <= i")
+
+    dep_s5_on_s4_1 = _dep_with_condition("i' <  i")
+
+    knl = lp.add_dependency_v2(knl, "s2", "s1", dep_s2_on_s1_1)
+    knl = lp.add_dependency_v2(knl, "s2", "s1", dep_s2_on_s1_2)
+
+    knl = lp.add_dependency_v2(knl, "s2", "s2", dep_s2_on_s2_1)
+    knl = lp.add_dependency_v2(knl, "s2", "s2", dep_s2_on_s2_2)
+
+    knl = lp.add_dependency_v2(knl, "s3", "s2", dep_s3_on_s2_1)
+    knl = lp.add_dependency_v2(knl, "s3", "s2", dep_s3_on_s2_2)
+
+    knl = lp.add_dependency_v2(knl, "s4", "s3", dep_s4_on_s3_1)
+    knl = lp.add_dependency_v2(knl, "s4", "s3", dep_s4_on_s3_2)
+
+    knl = lp.add_dependency_v2(knl, "s5", "s4", dep_s5_on_s4_1)
+
+    # Filter deps by intersection with SAME
+
+    from loopy.schedule.checker.dependency import (
+        filter_deps_by_intersection_with_SAME,
+    )
+    dep_edges_filtered = filter_deps_by_intersection_with_SAME(knl)
+
+    # Make sure filtered edges are correct
+
+    # (m is concurrent so shouldn't matter)
+    dep_edges_expected = set([
+        ("s1", "s2"),
+        ("s2", "s2"),
+        ("s3", "s4"),
+        ])
+
+    assert dep_edges_filtered == dep_edges_expected
+
+# }}}
+
+# }}}
+
 # }}}
 
 

From d3c1adcd6ca3948d58afa40ab03e5f27c1ea0608 Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Sat, 24 Apr 2021 01:53:09 -0500
Subject: [PATCH 310/460] reverse order of cartoon graph dict so it maps
 depender->dependee

---
 loopy/schedule/checker/dependency.py | 13 ++++++++++---
 test/test_linearization_checker.py   | 18 +++++++++++-------
 2 files changed, 21 insertions(+), 10 deletions(-)

diff --git a/loopy/schedule/checker/dependency.py b/loopy/schedule/checker/dependency.py
index 1aea2520d..97b5321bd 100644
--- a/loopy/schedule/checker/dependency.py
+++ b/loopy/schedule/checker/dependency.py
@@ -77,14 +77,21 @@ def filter_deps_by_intersection_with_SAME(knl):
     )
     _, non_conc_inames = partition_inames_by_concurrency(knl)
 
-    deps_filtered = set()
+    # NOTE: deps filtered will map depender->dependee
+    deps_filtered = {}
     for stmt in knl.instructions:
+
         if hasattr(stmt, "dependencies") and stmt.dependencies:
+
             depender_id = stmt.id
+
             for dependee_id, dep_maps in stmt.dependencies.items():
+
                 # Continue if we already have this pair
-                if (dependee_id, depender_id) in deps_filtered:
+                if dependee_id in deps_filtered.keys() and (
+                        depender_id in deps_filtered[dependee_id]):
                     continue
+
                 for dep_map in dep_maps:
                     # Create isl map representing "SAME" dep for these two insns
 
@@ -116,7 +123,7 @@ def filter_deps_by_intersection_with_SAME(knl):
                     intersect_not_empty = not bool(intersect_dep_and_same.is_empty())
 
                     if intersect_not_empty:
-                        deps_filtered.add((dependee_id, depender_id))
+                        deps_filtered.setdefault(depender_id, set()).add(dependee_id)
                         break  # No need to check any more deps for this pair
 
     return deps_filtered
diff --git a/test/test_linearization_checker.py b/test/test_linearization_checker.py
index de3a9f416..383278326 100644
--- a/test/test_linearization_checker.py
+++ b/test/test_linearization_checker.py
@@ -2294,6 +2294,8 @@ def _dep_with_condition(cond):
 
     dep_s5_on_s4_1 = _dep_with_condition("i' <  i")
 
+    dep_s5_on_s2_1 = _dep_with_condition("i' =  i")
+
     knl = lp.add_dependency_v2(knl, "s2", "s1", dep_s2_on_s1_1)
     knl = lp.add_dependency_v2(knl, "s2", "s1", dep_s2_on_s1_2)
 
@@ -2308,23 +2310,25 @@ def _dep_with_condition(cond):
 
     knl = lp.add_dependency_v2(knl, "s5", "s4", dep_s5_on_s4_1)
 
+    knl = lp.add_dependency_v2(knl, "s5", "s2", dep_s5_on_s2_1)
+
     # Filter deps by intersection with SAME
 
     from loopy.schedule.checker.dependency import (
         filter_deps_by_intersection_with_SAME,
     )
-    dep_edges_filtered = filter_deps_by_intersection_with_SAME(knl)
+    filtered_depends_on_dict = filter_deps_by_intersection_with_SAME(knl)
 
     # Make sure filtered edges are correct
 
     # (m is concurrent so shouldn't matter)
-    dep_edges_expected = set([
-        ("s1", "s2"),
-        ("s2", "s2"),
-        ("s3", "s4"),
-        ])
+    depends_on_dict_expected = {
+        "s2": set(["s1", "s2"]),
+        "s4": set(["s3"]),
+        "s5": set(["s2"]),
+        }
 
-    assert dep_edges_filtered == dep_edges_expected
+    assert filtered_depends_on_dict == depends_on_dict_expected
 
 # }}}
 

From 57dc139f2cd461b4ad413a1cff33c84cc44ec720 Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Sat, 24 Apr 2021 02:38:33 -0500
Subject: [PATCH 311/460] create new kernel option use_dependencies_v2

---
 loopy/options.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/loopy/options.py b/loopy/options.py
index 45eb3eb63..3742cb27b 100644
--- a/loopy/options.py
+++ b/loopy/options.py
@@ -231,6 +231,7 @@ def __init__(
                 disable_global_barriers=kwargs.get("disable_global_barriers",
                     False),
                 check_dep_resolution=kwargs.get("check_dep_resolution", True),
+                use_dependencies_v2=kwargs.get("use_dependencies_v2", False),
 
                 enforce_variable_access_ordered=kwargs.get(
                     "enforce_variable_access_ordered", True),

From 111419af655e4344c7560cab0671504a943fff1e Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Sat, 24 Apr 2021 02:39:11 -0500
Subject: [PATCH 312/460] use new dependencies to create cartoon dep graph for
 linearization

---
 loopy/schedule/__init__.py | 93 ++++++++++++++++++++++++++------------
 1 file changed, 65 insertions(+), 28 deletions(-)

diff --git a/loopy/schedule/__init__.py b/loopy/schedule/__init__.py
index 7a2c9b80e..dc3696923 100644
--- a/loopy/schedule/__init__.py
+++ b/loopy/schedule/__init__.py
@@ -252,57 +252,94 @@ def find_loop_nest_around_map(kernel):
     return result
 
 
-def find_loop_insn_dep_map(kernel, loop_nest_with_map, loop_nest_around_map):
+def find_loop_insn_dep_map(
+        kernel, loop_nest_with_map, loop_nest_around_map,
+        cartoon_depends_on_dict, use_dependencies_v2=False,
+        ):
     """Returns a dictionary mapping inames to other instruction ids that need to
     be scheduled before the iname should be eligible for scheduling.
+
+    :arg loop_nest_with_map: Dictionary mapping iname1 to a set containing
+        iname2 iff either iname1 nests around iname2 or iname2 nests around
+        iname1
+
+    :arg loop_nest_around_map: Dictionary mapping iname1 to a set containing
+        iname2 iff iname2 nests around iname1
+
     """
 
     result = {}
 
     from loopy.kernel.data import ConcurrentTag, IlpBaseTag
+    # For each insn, examine its inames (`iname`) and its dependees' inames
+    # (`dep_iname`) to determine which instructions must be scheduled before
+    # entering the iname loop.
+    # Create result dict, which maps iname to instructions that must be
+    # scheduled prior to entering iname.
+
+    # For each insn, loop over its non-concurrent inames (`iname`)
     for insn in kernel.instructions:
         for iname in kernel.insn_inames(insn):
+            # (Ignore concurrent inames)
             if kernel.iname_tags_of_type(iname, ConcurrentTag):
                 continue
 
+            # Let iname_dep be the set of ids associated with result[iname]
+            # (if iname is not already in result, add iname as a key)
             iname_dep = result.setdefault(iname, set())
 
-            for dep_insn_id in insn.depends_on:
+            # Loop over instructions on which insn depends (dep_insn)
+            # and determine whether dep_insn must be schedued before
+            # iname, in which case add its id to iname_dep (result[iname])
+            if kernel.options.use_dependencies_v2:
+                dependee_ids = cartoon_depends_on_dict.get(insn.id, set())
+            else:
+                dependee_ids = insn.depends_on
+
+            for dep_insn_id in dependee_ids:
                 if dep_insn_id in iname_dep:
                     # already depending, nothing to check
                     continue
 
-                dep_insn = kernel.id_to_insn[dep_insn_id]
-                dep_insn_inames = dep_insn.within_inames
+                dep_insn = kernel.id_to_insn[dep_insn_id]  # Dependee
+                dep_insn_inames = dep_insn.within_inames  # Dependee inames
 
+                # Check whether insn's iname is also in dependee inames
                 if iname in dep_insn_inames:
-                    # Nothing to be learned, dependency is in loop over iname
+                    # Nothing to be learned, dependee is inside loop over iname
                     # already.
                     continue
 
                 # To make sure dep_insn belongs outside of iname, we must prove
-                # that all inames that dep_insn will be executed in nest
+                # that all inames in which dep_insn will be executed nest
                 # outside of the loop over *iname*. (i.e. nested around, or
                 # before).
 
+                # Loop over each of the dependee's inames (dep_insn_iname)
                 may_add_to_loop_dep_map = True
                 for dep_insn_iname in dep_insn_inames:
+
+                    # If loop_nest_around_map says dep_insn_iname nests around
+                    # iname, dep_insn_iname is guaranteed to nest outside of
+                    # iname, we're safe, so continue
                     if dep_insn_iname in loop_nest_around_map[iname]:
-                        # dep_insn_iname is guaranteed to nest outside of iname
-                        # -> safe.
                         continue
 
+                    # If dep_insn_iname is concurrent, continue
+                    # (parallel tags don't really nest, so disregard them here)
                     if kernel.iname_tags_of_type(dep_insn_iname,
                                 (ConcurrentTag, IlpBaseTag)):
-                        # Parallel tags don't really nest, so we'll disregard
-                        # them here.
                         continue
 
+                    # If loop_nest_with_map says dep_insn_iname does not nest
+                    # inside or around iname, it must be nested separately;
+                    # we're safe, so continue
                     if dep_insn_iname not in loop_nest_with_map.get(iname, []):
-                        # dep_insn_iname does not nest with iname, so its nest
-                        # must occur outside.
                         continue
 
+                    # If none of the three cases above succeeds for any
+                    # dep_insn_iname in dep_insn_inames, we cannot add dep_insn
+                    # to iname's set of insns in result dict.
                     may_add_to_loop_dep_map = False
                     break
 
@@ -317,6 +354,9 @@ def find_loop_insn_dep_map(kernel, loop_nest_with_map, loop_nest_around_map):
                             dep_insn=dep_insn_id,
                             insn=insn.id))
 
+                # If at least one of the three cases above succeeds for every
+                # dep_insn_iname, we can add dep_insn to iname's set of insns
+                # in result dict.
                 iname_dep.add(dep_insn_id)
 
     return result
@@ -1985,22 +2025,18 @@ def generate_loop_schedules_inner(kernel, debug_args={}):
     loop_nest_with_map = find_loop_nest_with_map(kernel)
     loop_nest_around_map = find_loop_nest_around_map(kernel)
 
-    # {{{  create dependency graph with edges from depender* to dependee*
-    # iff intersection (SAME_map & DEP_map) is not empty
+    # {{{  create cartoon dependency graph with edge from depender* to
+    # dependee* iff intersection (SAME_map & DEP_map) is not empty
 
-    from loopy.schedule.checker.dependency import (
-        filter_deps_by_intersection_with_SAME,
-    )
-    from loopy.schedule.checker.utils import (
-        create_graph_from_pairs,
-    )
+    if kernel.options.use_dependencies_v2:
+        from loopy.schedule.checker.dependency import (
+            filter_deps_by_intersection_with_SAME,
+        )
 
-    # Get dep graph edges with edges from depender->dependee
-    dep_graph_pairs = filter_deps_by_intersection_with_SAME(kernel)
-
-    # Create dep graph from edges
-    insn_depends_on_graph = create_graph_from_pairs(dep_graph_pairs)
-    # TODO create ^this func
+        # Get dep graph edges with edges FROM depender TO dependee
+        cartoon_depends_on_dict = filter_deps_by_intersection_with_SAME(kernel)
+    else:
+        cartoon_depends_on_dict = None
 
     # }}}
 
@@ -2011,8 +2047,9 @@ def generate_loop_schedules_inner(kernel, debug_args={}):
                 kernel,
                 loop_nest_with_map=loop_nest_with_map,
                 loop_nest_around_map=loop_nest_around_map,
-                insn_depends_on_graph=insn_depends_on_graph),  # TODO deal with this
-            insn_depends_on_graph=insn_depends_on_graph,  # TODO deal with this
+                cartoon_depends_on_dict=cartoon_depends_on_dict,
+                ),
+            #insn_depends_on_graph=insn_depends_on_graph,  # TODO deal with this
             breakable_inames=ilp_inames,
             ilp_inames=ilp_inames,
             vec_inames=vec_inames,

From 926ae65558c5b7543a5c8c9b66d2ed16b8f978bb Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Sat, 24 Apr 2021 03:52:22 -0500
Subject: [PATCH 313/460] test use of cartoon dep graph inside
 find_loop_insn_dep_map() with new test:
 test_find_loop_insn_dep_map_using_cartoon_dep_graph()

---
 test/test_linearization_checker.py | 125 ++++++++++++++++++++++++++---
 1 file changed, 116 insertions(+), 9 deletions(-)

diff --git a/test/test_linearization_checker.py b/test/test_linearization_checker.py
index 383278326..b3b1dc495 100644
--- a/test/test_linearization_checker.py
+++ b/test/test_linearization_checker.py
@@ -1099,11 +1099,11 @@ def test_sios_and_schedules_with_vec_and_barriers():
     # Get a linearization
     proc_knl = preprocess_kernel(knl)
     lin_knl = get_one_linearized_kernel(proc_knl)
-    linearization_items = lin_knl.linearization
+    lin_items = lin_knl.linearization
 
     stmt_id_pairs = [("stmt_1", "stmt_2")]
     pworders = get_pairwise_statement_orderings(
-        lin_knl, linearization_items, stmt_id_pairs)
+        lin_knl, lin_items, stmt_id_pairs)
 
     # {{{ Relationship between stmt_1 and stmt_2
 
@@ -1321,12 +1321,12 @@ def test_sios_with_matmul():
     # Get a linearization
     proc_knl = preprocess_kernel(knl)
     lin_knl = get_one_linearized_kernel(proc_knl)
-    linearization_items = lin_knl.linearization
+    lin_items = lin_knl.linearization
 
     # Get ALL statement id pairs
     from loopy.schedule import RunInstruction
     all_stmt_ids = [
-        lin_item.insn_id for lin_item in linearization_items
+        lin_item.insn_id for lin_item in lin_items
         if isinstance(lin_item, RunInstruction)]
     from itertools import product
     stmt_id_pairs = []
@@ -1335,7 +1335,7 @@ def test_sios_with_matmul():
 
     # Generate pairwise ordering info for every pair
     get_pairwise_statement_orderings(
-        lin_knl, linearization_items, stmt_id_pairs)
+        lin_knl, lin_items, stmt_id_pairs)
 
 # }}}
 
@@ -2262,11 +2262,11 @@ def test_filtering_deps_by_same():
     knl = lp.make_kernel(
         "{[i,j,k,m] : 0 <= i,j,k,m < n}",
         """
-        a[i,j,k,m] = 1 {id=s1}
-        a[i,j,k,m] = 2 {id=s2}
-        a[i,j,k,m] = 3 {id=s3}
-        a[i,j,k,m] = 4 {id=s4}
         a[i,j,k,m] = 5 {id=s5}
+        a[i,j,k,m] = 4 {id=s4}
+        a[i,j,k,m] = 3 {id=s3}
+        a[i,j,k,m] = 2 {id=s2}
+        a[i,j,k,m] = 1 {id=s1}
         """)
     knl = lp.add_and_infer_dtypes(knl, {"a": np.float32})
     knl = lp.tag_inames(knl, "m:l.0")
@@ -2332,6 +2332,113 @@ def _dep_with_condition(cond):
 
 # }}}
 
+
+# {{{ test_find_loop_insn_dep_map_using_cartoon_dep_graph
+
+def test_find_loop_insn_dep_map_using_cartoon_dep_graph():
+    # Test use of cartoon dep graph inside find_loop_insn_dep_map(),
+    # which is called during linearization, and should cause
+    # linearization process to order the x loops below sequentially
+
+    # Make a kernel
+    knl = lp.make_kernel(
+        "{[i,j,k,m,x1,x2,x3,x4,x5] : 0 <= i,j,k,m,x1,x2,x3,x4,x5 < n}",
+        """
+        for i,j,k,m
+            for x5
+                <>t5 = 5 {id=s5}
+            end
+            for x3
+                <>t3 = 3 {id=s3}
+            end
+            for x4
+                <>t4 = 4 {id=s4}
+            end
+            for x1
+                <>t1 = 1 {id=s1}
+            end
+            for x2
+                <>t2 = 2 {id=s2}
+            end
+        end
+        """)
+    knl = lp.tag_inames(knl, "m:l.0")
+
+    # Make some deps
+
+    def _dep_with_condition(xloop_after, xloop_before, cond):
+        sid_after = 0 if xloop_before == xloop_after else 1
+        return _isl_map_with_marked_dims(
+            "[n] -> {{"
+            "[{0}'=0, i', j', k', m', x{1}'] -> [{0}={3}, i, j, k, m, x{2}] : "
+            "0 <= i,j,k,m,x{2},i',j',k',m',x{1}' < n and {4}"
+            "}}".format(
+                STATEMENT_VAR_NAME, xloop_before, xloop_after, sid_after, cond))
+
+    # Should NOT create an edge:
+    dep_s2_on_s1_1 = _dep_with_condition(2, 1, "i'< i and j'<=j and k' =k and m'=m")
+    # Should create an edge:
+    dep_s2_on_s1_2 = _dep_with_condition(2, 1, "i'<=i and j'<=j and k' =k and m'=m")
+    # Should NOT create an edge:
+    dep_s2_on_s2_1 = _dep_with_condition(2, 2, "i'< i and j'<=j and k' =k and m'=m")
+    # Should NOT create an edge:
+    dep_s2_on_s2_2 = _dep_with_condition(2, 2, "i'<=i and j'<=j and k'< k and m'=m")
+    # Should create an edge:
+    dep_s3_on_s2_1 = _dep_with_condition(3, 2, "i'<=i and j'<=j and k' =k and m'=m")
+    # Should create an edge:
+    dep_s4_on_s3_1 = _dep_with_condition(4, 3, "i'<=i and j'<=j and k' =k and m'=m")
+    # Should create an edge:
+    dep_s5_on_s4_1 = _dep_with_condition(5, 4, "i' =i and j' =j and k' =k and m'=m")
+
+    knl = lp.add_dependency_v2(knl, "s2", "s1", dep_s2_on_s1_1)
+    knl = lp.add_dependency_v2(knl, "s2", "s1", dep_s2_on_s1_2)
+    knl = lp.add_dependency_v2(knl, "s2", "s2", dep_s2_on_s2_1)
+    knl = lp.add_dependency_v2(knl, "s2", "s2", dep_s2_on_s2_2)
+    knl = lp.add_dependency_v2(knl, "s3", "s2", dep_s3_on_s2_1)
+    knl = lp.add_dependency_v2(knl, "s4", "s3", dep_s4_on_s3_1)
+    knl = lp.add_dependency_v2(knl, "s5", "s4", dep_s5_on_s4_1)
+
+    # Test filteringn of deps by intersection with SAME
+
+    from loopy.schedule.checker.dependency import (
+        filter_deps_by_intersection_with_SAME,
+    )
+    filtered_depends_on_dict = filter_deps_by_intersection_with_SAME(knl)
+
+    # Make sure filtered edges are correct
+
+    # (m is concurrent so shouldn't matter)
+    depends_on_dict_expected = {
+        "s2": set(["s1"]),
+        "s3": set(["s2"]),
+        "s4": set(["s3"]),
+        "s5": set(["s4"]),
+        }
+
+    assert filtered_depends_on_dict == depends_on_dict_expected
+
+    # Get a linearization
+    knl = lp.set_options(knl, use_dependencies_v2=True)
+    proc_knl = preprocess_kernel(knl)
+    lin_knl = get_one_linearized_kernel(proc_knl)
+    lin_items = lin_knl.linearization
+
+    # Check stmt order
+    from loopy.schedule import RunInstruction
+    stmt_ids_ordered = [
+        lin_item.insn_id for lin_item in lin_items
+        if isinstance(lin_item, RunInstruction)]
+
+    stmt_ids_ordered_expected = ["s1", "s2", "s3", "s4", "s5"]
+
+    assert stmt_ids_ordered == stmt_ids_ordered_expected
+
+    # Check dep satisfaction
+    unsatisfied_deps = lp.find_unsatisfied_dependencies(proc_knl, lin_items)
+    assert not unsatisfied_deps
+
+# }}}
+
 # }}}
 
 # }}}

From c689a9d43238076d375e12f2a8c180ad9c685ad6 Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Sat, 24 Apr 2021 23:00:10 -0500
Subject: [PATCH 314/460] enable usage of cartoon dependency graph for
 statement ordering decisions during linearization

---
 loopy/schedule/__init__.py | 34 ++++++++++++++++++++++++++--------
 1 file changed, 26 insertions(+), 8 deletions(-)

diff --git a/loopy/schedule/__init__.py b/loopy/schedule/__init__.py
index dc3696923..37f44b310 100644
--- a/loopy/schedule/__init__.py
+++ b/loopy/schedule/__init__.py
@@ -372,16 +372,24 @@ def group_insn_counts(kernel):
     return result
 
 
-def gen_dependencies_except(kernel, insn_id, except_insn_ids):
-    insn = kernel.id_to_insn[insn_id]
-    for dep_id in insn.depends_on:
+def gen_dependencies_except(
+        kernel, insn_id, except_insn_ids, cartoon_depends_on_dict):
+
+    # Get dependee IDs
+    if kernel.options.use_dependencies_v2:
+        dependee_ids = cartoon_depends_on_dict.get(insn_id, set())
+    else:
+        dependee_ids = kernel.id_to_insn[insn_id].depends_on
+
+    for dep_id in dependee_ids:
 
         if dep_id in except_insn_ids:
             continue
 
         yield dep_id
 
-        yield from gen_dependencies_except(kernel, dep_id, except_insn_ids)
+        yield from gen_dependencies_except(
+            kernel, dep_id, except_insn_ids, cartoon_depends_on_dict)
 
 
 def get_priority_tiers(wanted, priorities):
@@ -665,6 +673,8 @@ class SchedulerState(ImmutableRecord):
         order with instruction priorities as tie breaker.
     """
 
+    # TODO document cartoon_depends_on_dict
+
     @property
     def last_entered_loop(self):
         if self.active_inames:
@@ -971,7 +981,13 @@ def insn_sort_key(insn_id):
     for insn_id in insn_ids_to_try:
         insn = kernel.id_to_insn[insn_id]
 
-        is_ready = insn.depends_on <= sched_state.scheduled_insn_ids
+        # make sure dependees have been scheduled
+        if kernel.options.use_dependencies_v2:
+            dependee_ids = sched_state.cartoon_depends_on_dict.get(insn.id, set())
+        else:
+            dependee_ids = insn.depends_on
+
+        is_ready = dependee_ids <= sched_state.scheduled_insn_ids
 
         if not is_ready:
             continue
@@ -1150,8 +1166,10 @@ def insn_sort_key(insn_id):
 
                         # check if there's a dependency of insn that needs to be
                         # outside of last_entered_loop.
-                        for subdep_id in gen_dependencies_except(kernel, insn_id,
-                                sched_state.scheduled_insn_ids):
+                        for subdep_id in gen_dependencies_except(
+                                kernel, insn_id,
+                                sched_state.scheduled_insn_ids,
+                                sched_state.cartoon_depends_on_dict):
                             want = (kernel.insn_inames(subdep_id)
                                     - sched_state.parallel_inames)
                             if (
@@ -2049,7 +2067,7 @@ def generate_loop_schedules_inner(kernel, debug_args={}):
                 loop_nest_around_map=loop_nest_around_map,
                 cartoon_depends_on_dict=cartoon_depends_on_dict,
                 ),
-            #insn_depends_on_graph=insn_depends_on_graph,  # TODO deal with this
+            cartoon_depends_on_dict=cartoon_depends_on_dict,
             breakable_inames=ilp_inames,
             ilp_inames=ilp_inames,
             vec_inames=vec_inames,

From ac5dc08e635506a0d4f1f181f1be977b6c0037cb Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Sat, 24 Apr 2021 23:02:50 -0500
Subject: [PATCH 315/460] rename
 cartoon_depends_on_dict->simplified_depends_on_graph

---
 loopy/schedule/__init__.py | 27 ++++++++++++++-------------
 1 file changed, 14 insertions(+), 13 deletions(-)

diff --git a/loopy/schedule/__init__.py b/loopy/schedule/__init__.py
index 37f44b310..94176ff12 100644
--- a/loopy/schedule/__init__.py
+++ b/loopy/schedule/__init__.py
@@ -254,7 +254,7 @@ def find_loop_nest_around_map(kernel):
 
 def find_loop_insn_dep_map(
         kernel, loop_nest_with_map, loop_nest_around_map,
-        cartoon_depends_on_dict, use_dependencies_v2=False,
+        simplified_depends_on_graph, use_dependencies_v2=False,
         ):
     """Returns a dictionary mapping inames to other instruction ids that need to
     be scheduled before the iname should be eligible for scheduling.
@@ -292,7 +292,7 @@ def find_loop_insn_dep_map(
             # and determine whether dep_insn must be schedued before
             # iname, in which case add its id to iname_dep (result[iname])
             if kernel.options.use_dependencies_v2:
-                dependee_ids = cartoon_depends_on_dict.get(insn.id, set())
+                dependee_ids = simplified_depends_on_graph.get(insn.id, set())
             else:
                 dependee_ids = insn.depends_on
 
@@ -373,11 +373,11 @@ def group_insn_counts(kernel):
 
 
 def gen_dependencies_except(
-        kernel, insn_id, except_insn_ids, cartoon_depends_on_dict):
+        kernel, insn_id, except_insn_ids, simplified_depends_on_graph):
 
     # Get dependee IDs
     if kernel.options.use_dependencies_v2:
-        dependee_ids = cartoon_depends_on_dict.get(insn_id, set())
+        dependee_ids = simplified_depends_on_graph.get(insn_id, set())
     else:
         dependee_ids = kernel.id_to_insn[insn_id].depends_on
 
@@ -389,7 +389,7 @@ def gen_dependencies_except(
         yield dep_id
 
         yield from gen_dependencies_except(
-            kernel, dep_id, except_insn_ids, cartoon_depends_on_dict)
+            kernel, dep_id, except_insn_ids, simplified_depends_on_graph)
 
 
 def get_priority_tiers(wanted, priorities):
@@ -673,7 +673,7 @@ class SchedulerState(ImmutableRecord):
         order with instruction priorities as tie breaker.
     """
 
-    # TODO document cartoon_depends_on_dict
+    # TODO document simplified_depends_on_graph
 
     @property
     def last_entered_loop(self):
@@ -983,7 +983,8 @@ def insn_sort_key(insn_id):
 
         # make sure dependees have been scheduled
         if kernel.options.use_dependencies_v2:
-            dependee_ids = sched_state.cartoon_depends_on_dict.get(insn.id, set())
+            dependee_ids = sched_state.simplified_depends_on_graph.get(
+                insn.id, set())
         else:
             dependee_ids = insn.depends_on
 
@@ -1169,7 +1170,7 @@ def insn_sort_key(insn_id):
                         for subdep_id in gen_dependencies_except(
                                 kernel, insn_id,
                                 sched_state.scheduled_insn_ids,
-                                sched_state.cartoon_depends_on_dict):
+                                sched_state.simplified_depends_on_graph):
                             want = (kernel.insn_inames(subdep_id)
                                     - sched_state.parallel_inames)
                             if (
@@ -2043,7 +2044,7 @@ def generate_loop_schedules_inner(kernel, debug_args={}):
     loop_nest_with_map = find_loop_nest_with_map(kernel)
     loop_nest_around_map = find_loop_nest_around_map(kernel)
 
-    # {{{  create cartoon dependency graph with edge from depender* to
+    # {{{  create simplified dependency graph with edge from depender* to
     # dependee* iff intersection (SAME_map & DEP_map) is not empty
 
     if kernel.options.use_dependencies_v2:
@@ -2052,9 +2053,9 @@ def generate_loop_schedules_inner(kernel, debug_args={}):
         )
 
         # Get dep graph edges with edges FROM depender TO dependee
-        cartoon_depends_on_dict = filter_deps_by_intersection_with_SAME(kernel)
+        simplified_depends_on_graph = filter_deps_by_intersection_with_SAME(kernel)
     else:
-        cartoon_depends_on_dict = None
+        simplified_depends_on_graph = None
 
     # }}}
 
@@ -2065,9 +2066,9 @@ def generate_loop_schedules_inner(kernel, debug_args={}):
                 kernel,
                 loop_nest_with_map=loop_nest_with_map,
                 loop_nest_around_map=loop_nest_around_map,
-                cartoon_depends_on_dict=cartoon_depends_on_dict,
+                simplified_depends_on_graph=simplified_depends_on_graph,
                 ),
-            cartoon_depends_on_dict=cartoon_depends_on_dict,
+            simplified_depends_on_graph=simplified_depends_on_graph,
             breakable_inames=ilp_inames,
             ilp_inames=ilp_inames,
             vec_inames=vec_inames,

From 3d808f1a653aece8425a41762416b5382fdd5329 Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Sat, 24 Apr 2021 23:03:52 -0500
Subject: [PATCH 316/460] test usage of cartoon dependency graph for statement
 ordering decisions during linearization

---
 test/test_linearization_checker.py | 118 ++++++++++++++++-------------
 1 file changed, 64 insertions(+), 54 deletions(-)

diff --git a/test/test_linearization_checker.py b/test/test_linearization_checker.py
index b3b1dc495..10c38ddfd 100644
--- a/test/test_linearization_checker.py
+++ b/test/test_linearization_checker.py
@@ -138,6 +138,13 @@ def _check_orderings_for_stmt_pair(
     maps_to_compare = [(m1, m2) for m1, m2 in map_candidates if m1 is not None]
     _align_and_compare_maps(maps_to_compare)
 
+
+def _get_runinstruction_ids_from_linearization(lin_items):
+    from loopy.schedule import RunInstruction
+    return [
+        lin_item.insn_id for lin_item in lin_items
+        if isinstance(lin_item, RunInstruction)]
+
 # }}}
 
 
@@ -1316,18 +1323,13 @@ def test_sios_with_matmul():
         knl, "b", ["j_inner", "k_inner"], default_tag="l.auto")
     knl = lp.prioritize_loops(knl, "k_outer,k_inner")
 
-    proc_knl = preprocess_kernel(knl)
-
     # Get a linearization
     proc_knl = preprocess_kernel(knl)
     lin_knl = get_one_linearized_kernel(proc_knl)
     lin_items = lin_knl.linearization
 
     # Get ALL statement id pairs
-    from loopy.schedule import RunInstruction
-    all_stmt_ids = [
-        lin_item.insn_id for lin_item in lin_items
-        if isinstance(lin_item, RunInstruction)]
+    all_stmt_ids = _get_runinstruction_ids_from_linearization(lin_items)
     from itertools import product
     stmt_id_pairs = []
     for idx, sid in enumerate(all_stmt_ids):
@@ -2273,28 +2275,30 @@ def test_filtering_deps_by_same():
 
     # Make some deps
 
-    def _dep_with_condition(cond):
+    def _dep_with_condition(stmt_before, stmt_after, cond):
+        sid_after = 0 if stmt_before == stmt_after else 1
         return _isl_map_with_marked_dims(
             "[n] -> {{"
-            "[{0}'=0, i', j', k', m'] -> [{0}=0, i, j, k, m] : "
-            "0 <= i,j,k,m,i',j',k',m' < n and {1}"
-            "}}".format(STATEMENT_VAR_NAME, cond))
+            "[{0}'=0, i', j', k', m'] -> [{0}={1}, i, j, k, m] : "
+            "0 <= i,j,k,m,i',j',k',m' < n and {2}"
+            "}}".format(
+                STATEMENT_VAR_NAME, sid_after, cond))
 
-    dep_s2_on_s1_1 = _dep_with_condition("i' <  i and j' <= j and k' = k and m' < m")
-    dep_s2_on_s1_2 = _dep_with_condition("i' <= i and j' <= j and k' = k and m' < m")
+    dep_s2_on_s1_1 = _dep_with_condition(2, 1, "i'< i and j'<=j and k'=k and m'<m")
+    dep_s2_on_s1_2 = _dep_with_condition(2, 1, "i'<=i and j'<=j and k'=k and m'<m")
 
-    dep_s2_on_s2_1 = _dep_with_condition("i' <  i and j' <= j and k' = k and m' < m")
-    dep_s2_on_s2_2 = _dep_with_condition("i' <= i and j' <= j and k' = k and m' < m")
+    dep_s2_on_s2_1 = _dep_with_condition(2, 2, "i'< i and j'<=j and k'=k and m'<m")
+    dep_s2_on_s2_2 = _dep_with_condition(2, 2, "i'<=i and j'<=j and k'=k and m'<m")
 
-    dep_s3_on_s2_1 = _dep_with_condition("i' <  i and j' <  j and k' = k and m' < m")
-    dep_s3_on_s2_2 = _dep_with_condition("i' =  i and j' =  j and k' < k and m' < m")
+    dep_s3_on_s2_1 = _dep_with_condition(3, 2, "i'< i and j'< j and k'=k and m'<m")
+    dep_s3_on_s2_2 = _dep_with_condition(3, 2, "i' =i and j'= j and k'<k and m'<m")
 
-    dep_s4_on_s3_1 = _dep_with_condition("i' <= i and j' <= j and k' = k")
-    dep_s4_on_s3_2 = _dep_with_condition("i' <= i")
+    dep_s4_on_s3_1 = _dep_with_condition(4, 3, "i'<=i and j'<=j and k'=k")
+    dep_s4_on_s3_2 = _dep_with_condition(4, 3, "i'<=i")
 
-    dep_s5_on_s4_1 = _dep_with_condition("i' <  i")
+    dep_s5_on_s4_1 = _dep_with_condition(5, 4, "i'< i")
 
-    dep_s5_on_s2_1 = _dep_with_condition("i' =  i")
+    dep_s5_on_s2_1 = _dep_with_condition(5, 2, "i'= i")
 
     knl = lp.add_dependency_v2(knl, "s2", "s1", dep_s2_on_s1_1)
     knl = lp.add_dependency_v2(knl, "s2", "s1", dep_s2_on_s1_2)
@@ -2333,47 +2337,38 @@ def _dep_with_condition(cond):
 # }}}
 
 
-# {{{ test_find_loop_insn_dep_map_using_cartoon_dep_graph
+# {{{ test_linearization_using_simplified_dep_graph
 
-def test_find_loop_insn_dep_map_using_cartoon_dep_graph():
-    # Test use of cartoon dep graph inside find_loop_insn_dep_map(),
-    # which is called during linearization, and should cause
-    # linearization process to order the x loops below sequentially
+def test_linearization_using_simplified_dep_graph():
+    # Test use of simplified dep graph inside find_loop_insn_dep_map(),
+    # which is called during linearization.
+    # The deps created below should yield a simplified dep graph that causes the
+    # linearization process to order assignments below in numerical order
 
     # Make a kernel
     knl = lp.make_kernel(
-        "{[i,j,k,m,x1,x2,x3,x4,x5] : 0 <= i,j,k,m,x1,x2,x3,x4,x5 < n}",
+        "{[i,j,k,m] : 0 <= i,j,k,m < n}",
         """
         for i,j,k,m
-            for x5
-                <>t5 = 5 {id=s5}
-            end
-            for x3
-                <>t3 = 3 {id=s3}
-            end
-            for x4
-                <>t4 = 4 {id=s4}
-            end
-            for x1
-                <>t1 = 1 {id=s1}
-            end
-            for x2
-                <>t2 = 2 {id=s2}
-            end
+            <>t5 = 5 {id=s5}
+            <>t3 = 3 {id=s3}
+            <>t4 = 4 {id=s4}
+            <>t1 = 1 {id=s1}
+            <>t2 = 2 {id=s2}
         end
         """)
     knl = lp.tag_inames(knl, "m:l.0")
 
     # Make some deps
 
-    def _dep_with_condition(xloop_after, xloop_before, cond):
-        sid_after = 0 if xloop_before == xloop_after else 1
+    def _dep_with_condition(stmt_before, stmt_after, cond):
+        sid_after = 0 if stmt_before == stmt_after else 1
         return _isl_map_with_marked_dims(
             "[n] -> {{"
-            "[{0}'=0, i', j', k', m', x{1}'] -> [{0}={3}, i, j, k, m, x{2}] : "
-            "0 <= i,j,k,m,x{2},i',j',k',m',x{1}' < n and {4}"
+            "[{0}'=0, i', j', k', m'] -> [{0}={1}, i, j, k, m] : "
+            "0 <= i,j,k,m,i',j',k',m' < n and {2}"
             "}}".format(
-                STATEMENT_VAR_NAME, xloop_before, xloop_after, sid_after, cond))
+                STATEMENT_VAR_NAME, sid_after, cond))
 
     # Should NOT create an edge:
     dep_s2_on_s1_1 = _dep_with_condition(2, 1, "i'< i and j'<=j and k' =k and m'=m")
@@ -2417,26 +2412,41 @@ def _dep_with_condition(xloop_after, xloop_before, cond):
 
     assert filtered_depends_on_dict == depends_on_dict_expected
 
-    # Get a linearization
-    knl = lp.set_options(knl, use_dependencies_v2=True)
+    stmt_ids_ordered_desired = ["s1", "s2", "s3", "s4", "s5"]
+
+    # {{{ Get a linearization WITHOUT using the simplified dep graph
+
+    knl = lp.set_options(knl, use_dependencies_v2=False)
     proc_knl = preprocess_kernel(knl)
     lin_knl = get_one_linearized_kernel(proc_knl)
     lin_items = lin_knl.linearization
 
     # Check stmt order
-    from loopy.schedule import RunInstruction
-    stmt_ids_ordered = [
-        lin_item.insn_id for lin_item in lin_items
-        if isinstance(lin_item, RunInstruction)]
+    stmt_ids_ordered = _get_runinstruction_ids_from_linearization(lin_items)
+    assert stmt_ids_ordered != stmt_ids_ordered_desired
+
+    # Check dep satisfaction
+    unsatisfied_deps = lp.find_unsatisfied_dependencies(proc_knl, lin_items)
+    assert unsatisfied_deps
 
-    stmt_ids_ordered_expected = ["s1", "s2", "s3", "s4", "s5"]
+    # }}}
 
-    assert stmt_ids_ordered == stmt_ids_ordered_expected
+    # {{{ Get a linearization using the simplified dep graph
+    knl = lp.set_options(knl, use_dependencies_v2=True)
+    proc_knl = preprocess_kernel(knl)
+    lin_knl = get_one_linearized_kernel(proc_knl)
+    lin_items = lin_knl.linearization
+
+    # Check stmt order
+    stmt_ids_ordered = _get_runinstruction_ids_from_linearization(lin_items)
+    assert stmt_ids_ordered == stmt_ids_ordered_desired
 
     # Check dep satisfaction
     unsatisfied_deps = lp.find_unsatisfied_dependencies(proc_knl, lin_items)
     assert not unsatisfied_deps
 
+    # }}}
+
 # }}}
 
 # }}}

From 39fe70508a67d35c2ac7410664fb36930dfc1bca Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Sat, 24 Apr 2021 23:19:11 -0500
Subject: [PATCH 317/460] reduce duplicated code in tests by adding
 _process_and_linearize(knl) function, which returns linearization items along
 with the preprocessed kernel and linearized kernel

---
 test/test_linearization_checker.py | 56 +++++++++++++-----------------
 1 file changed, 24 insertions(+), 32 deletions(-)

diff --git a/test/test_linearization_checker.py b/test/test_linearization_checker.py
index 060e3326d..c4eb49fc2 100644
--- a/test/test_linearization_checker.py
+++ b/test/test_linearization_checker.py
@@ -138,6 +138,14 @@ def _check_orderings_for_stmt_pair(
     maps_to_compare = [(m1, m2) for m1, m2 in map_candidates if m1 is not None]
     _align_and_compare_maps(maps_to_compare)
 
+
+def _process_and_linearize(knl):
+    # Return linearization items along with the preprocessed kernel and
+    # linearized kernel
+    proc_knl = preprocess_kernel(knl)
+    lin_knl = get_one_linearized_kernel(proc_knl)
+    return lin_knl.linearization, proc_knl, lin_knl
+
 # }}}
 
 
@@ -182,9 +190,7 @@ def test_intra_thread_pairwise_schedule_creation():
     knl = lp.prioritize_loops(knl, "i,j")
 
     # Get a linearization
-    proc_knl = preprocess_kernel(knl)
-    lin_knl = get_one_linearized_kernel(proc_knl)
-    linearization_items = lin_knl.linearization
+    lin_items, proc_knl, lin_knl = _process_and_linearize(knl)
 
     stmt_id_pairs = [
         ("stmt_a", "stmt_b"),
@@ -196,7 +202,7 @@ def test_intra_thread_pairwise_schedule_creation():
         ]
     pworders = get_pairwise_statement_orderings(
         lin_knl,
-        linearization_items,
+        lin_items,
         stmt_id_pairs,
         )
 
@@ -406,16 +412,14 @@ def test_pairwise_schedule_creation_with_hw_par_tags():
     knl = lp.tag_inames(knl, {"j": "l.1", "jj": "l.0", "i": "g.0"})
 
     # Get a linearization
-    proc_knl = preprocess_kernel(knl)
-    lin_knl = get_one_linearized_kernel(proc_knl)
-    linearization_items = lin_knl.linearization
+    lin_items, proc_knl, lin_knl = _process_and_linearize(knl)
 
     stmt_id_pairs = [
         ("stmt_a", "stmt_b"),
         ]
     pworders = get_pairwise_statement_orderings(
         lin_knl,
-        linearization_items,
+        lin_items,
         stmt_id_pairs,
         )
 
@@ -544,9 +548,7 @@ def test_intra_thread_statement_instance_ordering():
     knl = lp.prioritize_loops(knl, "i,j")
 
     # Get a linearization
-    knl = preprocess_kernel(knl)
-    knl = get_one_linearized_kernel(knl)
-    linearization_items = knl.linearization
+    lin_items, proc_knl, lin_knl = _process_and_linearize(knl)
 
     # Get pairwise schedules
     stmt_id_pairs = [
@@ -558,8 +560,8 @@ def test_intra_thread_statement_instance_ordering():
         ("stmt_c", "stmt_d"),
         ]
     pworders = get_pairwise_statement_orderings(
-        knl,
-        linearization_items,
+        proc_knl,
+        lin_items,
         stmt_id_pairs,
         )
 
@@ -688,9 +690,7 @@ def test_statement_instance_ordering_with_hw_par_tags():
     knl = lp.tag_inames(knl, {"j": "l.1", "jj": "l.0", "i": "g.0"})
 
     # Get a linearization
-    proc_knl = preprocess_kernel(knl)
-    lin_knl = get_one_linearized_kernel(proc_knl)
-    linearization_items = lin_knl.linearization
+    lin_items, proc_knl, lin_knl = _process_and_linearize(knl)
 
     # Get pairwise schedules
     stmt_id_pairs = [
@@ -698,7 +698,7 @@ def test_statement_instance_ordering_with_hw_par_tags():
         ]
     pworders = get_pairwise_statement_orderings(
         lin_knl,
-        linearization_items,
+        lin_items,
         stmt_id_pairs,
         )
 
@@ -772,13 +772,11 @@ def test_sios_and_schedules_with_barriers():
     knl = lp.tag_inames(knl, {"l0": "l.0", "l1": "l.1", "g0": "g.0"})
 
     # Get a linearization
-    proc_knl = preprocess_kernel(knl)
-    lin_knl = get_one_linearized_kernel(proc_knl)
-    linearization_items = lin_knl.linearization
+    lin_items, proc_knl, lin_knl = _process_and_linearize(knl)
 
     stmt_id_pairs = [("stmt_j1", "stmt_2"), ("stmt_1", "stmt_i0")]
     pworders = get_pairwise_statement_orderings(
-        lin_knl, linearization_items, stmt_id_pairs)
+        lin_knl, lin_items, stmt_id_pairs)
 
     # {{{ Relationship between stmt_j1 and stmt_2
 
@@ -1097,13 +1095,11 @@ def test_sios_and_schedules_with_vec_and_barriers():
     knl = lp.tag_inames(knl, {"i": "vec", "l0": "l.0"})
 
     # Get a linearization
-    proc_knl = preprocess_kernel(knl)
-    lin_knl = get_one_linearized_kernel(proc_knl)
-    linearization_items = lin_knl.linearization
+    lin_items, proc_knl, lin_knl = _process_and_linearize(knl)
 
     stmt_id_pairs = [("stmt_1", "stmt_2")]
     pworders = get_pairwise_statement_orderings(
-        lin_knl, linearization_items, stmt_id_pairs)
+        lin_knl, lin_items, stmt_id_pairs)
 
     # {{{ Relationship between stmt_1 and stmt_2
 
@@ -1316,17 +1312,13 @@ def test_sios_with_matmul():
         knl, "b", ["j_inner", "k_inner"], default_tag="l.auto")
     knl = lp.prioritize_loops(knl, "k_outer,k_inner")
 
-    proc_knl = preprocess_kernel(knl)
-
     # Get a linearization
-    proc_knl = preprocess_kernel(knl)
-    lin_knl = get_one_linearized_kernel(proc_knl)
-    linearization_items = lin_knl.linearization
+    lin_items, proc_knl, lin_knl = _process_and_linearize(knl)
 
     # Get ALL statement id pairs
     from loopy.schedule import RunInstruction
     all_stmt_ids = [
-        lin_item.insn_id for lin_item in linearization_items
+        lin_item.insn_id for lin_item in lin_items
         if isinstance(lin_item, RunInstruction)]
     from itertools import product
     stmt_id_pairs = []
@@ -1335,7 +1327,7 @@ def test_sios_with_matmul():
 
     # Generate pairwise ordering info for every pair
     get_pairwise_statement_orderings(
-        lin_knl, linearization_items, stmt_id_pairs)
+        lin_knl, lin_items, stmt_id_pairs)
 
 # }}}
 

From 1615991b9764efd6a4d0d39700aa952c595ab618 Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Sat, 24 Apr 2021 23:27:02 -0500
Subject: [PATCH 318/460] reduce duplicated code in tests by using
 _process_and_linearize(knl) function (created in ancestor branch)

---
 test/test_linearization_checker.py | 20 +++++---------------
 1 file changed, 5 insertions(+), 15 deletions(-)

diff --git a/test/test_linearization_checker.py b/test/test_linearization_checker.py
index fd01cc7d6..c5f3692ec 100644
--- a/test/test_linearization_checker.py
+++ b/test/test_linearization_checker.py
@@ -1435,9 +1435,7 @@ def test_add_dependency_v2():
             assert not stmt.dependencies
 
     # Now make sure deps are satisfied
-    proc_knl = preprocess_kernel(knl)
-    lin_knl = get_one_linearized_kernel(proc_knl)
-    lin_items = lin_knl.linearization
+    lin_items, proc_knl, lin_knl = _process_and_linearize(knl)
 
     unsatisfied_deps = lp.find_unsatisfied_dependencies(
         proc_knl, lin_items)
@@ -1481,9 +1479,7 @@ def test_new_dependencies_finite_diff():
     knl = lp.prioritize_loops(knl, "t,x")
 
     # Make sure deps are satisfied
-    proc_knl = preprocess_kernel(knl)
-    lin_knl = get_one_linearized_kernel(proc_knl)
-    lin_items = lin_knl.linearization
+    lin_items, proc_knl, lin_knl = _process_and_linearize(knl)
 
     unsatisfied_deps = lp.find_unsatisfied_dependencies(
         proc_knl, lin_items)
@@ -1499,9 +1495,7 @@ def test_new_dependencies_finite_diff():
     knl = lp.prioritize_loops(knl, "x,t")
 
     # Make sure unsatisfied deps are caught
-    proc_knl = preprocess_kernel(knl)
-    lin_knl = get_one_linearized_kernel(proc_knl)
-    lin_items = lin_knl.linearization
+    lin_items, proc_knl, lin_knl = _process_and_linearize(knl)
 
     unsatisfied_deps = lp.find_unsatisfied_dependencies(
         proc_knl, lin_items)
@@ -1518,9 +1512,7 @@ def test_new_dependencies_finite_diff():
     knl = lp.tag_inames(knl, "x:l.0")
 
     # Make sure unsatisfied deps are caught
-    proc_knl = preprocess_kernel(knl)
-    lin_knl = get_one_linearized_kernel(proc_knl)
-    lin_items = lin_knl.linearization
+    lin_items, proc_knl, lin_knl = _process_and_linearize(knl)
 
     # Without a barrier, deps not satisfied
     # Make sure there is no barrier, and that unsatisfied deps are caught
@@ -1551,9 +1543,7 @@ def test_new_dependencies_finite_diff():
         knl, {"u": np.float32, "dx": np.float32, "dt": np.float32})
 
     # Make sure deps are satisfied
-    proc_knl = preprocess_kernel(knl)
-    lin_knl = get_one_linearized_kernel(proc_knl)
-    lin_items = lin_knl.linearization
+    lin_items, proc_knl, lin_knl = _process_and_linearize(knl)
     print(lp.generate_code_v2(lin_knl).device_code())
 
     unsatisfied_deps = lp.find_unsatisfied_dependencies(

From 1d12fc05038491e0de3dae63d6207512701108a6 Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Sat, 24 Apr 2021 23:42:07 -0500
Subject: [PATCH 319/460] reduce duplicated code in tests by using
 _process_and_linearize(knl) function (created in ancestor branch)

---
 test/test_linearization_checker.py | 23 ++++++++++++-----------
 1 file changed, 12 insertions(+), 11 deletions(-)

diff --git a/test/test_linearization_checker.py b/test/test_linearization_checker.py
index 43aa804f7..50eab91cc 100644
--- a/test/test_linearization_checker.py
+++ b/test/test_linearization_checker.py
@@ -2348,7 +2348,9 @@ def test_linearization_using_simplified_dep_graph():
         """)
     knl = lp.tag_inames(knl, "m:l.0")
 
-    # Make some deps
+    stmt_ids_ordered_desired = ["s1", "s2", "s3", "s4", "s5"]
+
+    # {{{ Add some deps
 
     def _dep_with_condition(stmt_before, stmt_after, cond):
         sid_after = 0 if stmt_before == stmt_after else 1
@@ -2382,7 +2384,9 @@ def _dep_with_condition(stmt_before, stmt_after, cond):
     knl = lp.add_dependency_v2(knl, "s4", "s3", dep_s4_on_s3_1)
     knl = lp.add_dependency_v2(knl, "s5", "s4", dep_s5_on_s4_1)
 
-    # Test filteringn of deps by intersection with SAME
+    # }}}
+
+    # {{{ Test filteringn of deps by intersection with SAME
 
     from loopy.schedule.checker.dependency import (
         filter_deps_by_intersection_with_SAME,
@@ -2401,30 +2405,27 @@ def _dep_with_condition(stmt_before, stmt_after, cond):
 
     assert filtered_depends_on_dict == depends_on_dict_expected
 
-    stmt_ids_ordered_desired = ["s1", "s2", "s3", "s4", "s5"]
+    # }}}
 
     # {{{ Get a linearization WITHOUT using the simplified dep graph
 
     knl = lp.set_options(knl, use_dependencies_v2=False)
-    proc_knl = preprocess_kernel(knl)
-    lin_knl = get_one_linearized_kernel(proc_knl)
-    lin_items = lin_knl.linearization
+    lin_items, proc_knl, lin_knl = _process_and_linearize(knl)
 
-    # Check stmt order
+    # Check stmt order (should be wrong)
     stmt_ids_ordered = _get_runinstruction_ids_from_linearization(lin_items)
     assert stmt_ids_ordered != stmt_ids_ordered_desired
 
-    # Check dep satisfaction
+    # Check dep satisfaction (should not all be satisfied)
     unsatisfied_deps = lp.find_unsatisfied_dependencies(proc_knl, lin_items)
     assert unsatisfied_deps
 
     # }}}
 
     # {{{ Get a linearization using the simplified dep graph
+
     knl = lp.set_options(knl, use_dependencies_v2=True)
-    proc_knl = preprocess_kernel(knl)
-    lin_knl = get_one_linearized_kernel(proc_knl)
-    lin_items = lin_knl.linearization
+    lin_items, proc_knl, lin_knl = _process_and_linearize(knl)
 
     # Check stmt order
     stmt_ids_ordered = _get_runinstruction_ids_from_linearization(lin_items)

From ffd0610b2d4366086e3bdfc9ab0f00f6554300de Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Sun, 25 Apr 2021 00:20:03 -0500
Subject: [PATCH 320/460] in every remaining point during linearization where
 dependencies are used, check kernel option use_dependencies_v2 to determine
 whether to use the new simplified (cartoon) dep graph instead of
 insn.depends_on

---
 loopy/schedule/__init__.py | 41 +++++++++++++++++++++++++++++---------
 1 file changed, 32 insertions(+), 9 deletions(-)

diff --git a/loopy/schedule/__init__.py b/loopy/schedule/__init__.py
index 94176ff12..fc084fd68 100644
--- a/loopy/schedule/__init__.py
+++ b/loopy/schedule/__init__.py
@@ -254,8 +254,7 @@ def find_loop_nest_around_map(kernel):
 
 def find_loop_insn_dep_map(
         kernel, loop_nest_with_map, loop_nest_around_map,
-        simplified_depends_on_graph, use_dependencies_v2=False,
-        ):
+        simplified_depends_on_graph):
     """Returns a dictionary mapping inames to other instruction ids that need to
     be scheduled before the iname should be eligible for scheduling.
 
@@ -266,6 +265,13 @@ def find_loop_insn_dep_map(
     :arg loop_nest_around_map: Dictionary mapping iname1 to a set containing
         iname2 iff iname2 nests around iname1
 
+    :arg simplified_depends_on_graph: Dictionary mapping depender statement IDs
+        to sets of dependee statement IDs, as produced by
+        `loopy.schedule.checker.dependency.filter_deps_by_intersection_with_SAME`,
+        which will be used to acquire depndee statement ids if
+        `kernel.options.use_dependencies_v2` is 'True' (otherwise old
+        dependencies in insn.depends_on will be used).
+
     """
 
     result = {}
@@ -685,12 +691,20 @@ def last_entered_loop(self):
 # }}}
 
 
-def get_insns_in_topologically_sorted_order(kernel):
+def get_insns_in_topologically_sorted_order(
+        kernel, simplified_depends_on_graph):
     from pytools.graph import compute_topological_order
 
     rev_dep_map = {insn.id: set() for insn in kernel.instructions}
     for insn in kernel.instructions:
-        for dep in insn.depends_on:
+
+        if kernel.options.use_dependencies_v2:
+            dependee_ids = simplified_depends_on_graph.get(
+                insn.id, set())
+        else:
+            dependee_ids = insn.depends_on
+
+        for dep in dependee_ids:
             rev_dep_map[dep].add(insn.id)
 
     # For breaking ties, we compare the features of an intruction
@@ -724,7 +738,8 @@ def key(insn_id):
 
 # {{{ schedule_as_many_run_insns_as_possible
 
-def schedule_as_many_run_insns_as_possible(sched_state, template_insn):
+def schedule_as_many_run_insns_as_possible(
+        sched_state, template_insn, use_dependencies_v2):
     """
     Returns an instance of :class:`loopy.schedule.SchedulerState`, by appending
     all reachable instructions that are similar to *template_insn*. We define
@@ -792,7 +807,14 @@ def is_similar_to_template(insn):
 
         if is_similar_to_template(insn):
             # check reachability
-            if not (insn.depends_on & ignored_unscheduled_insn_ids):
+
+            if use_dependencies_v2:
+                dependee_ids = sched_state.simplified_depends_on_graph.get(
+                    insn.id, set())
+            else:
+                dependee_ids = insn.depends_on
+
+            if not (dependee_ids & ignored_unscheduled_insn_ids):
                 if insn.id in sched_state.prescheduled_insn_ids:
                     if next_preschedule_insn_id() == insn.id:
                         preschedule.pop(0)
@@ -1119,8 +1141,8 @@ def insn_sort_key(insn_id):
                     insns_in_topologically_sorted_order=new_toposorted_insns,
                     )
 
-            new_sched_state = schedule_as_many_run_insns_as_possible(new_sched_state,
-                    insn)
+            new_sched_state = schedule_as_many_run_insns_as_possible(
+                new_sched_state, insn, kernel.options.use_dependencies_v2)
 
             # Don't be eager about entering/leaving loops--if progress has been
             # made, revert to top of scheduler and see if more progress can be
@@ -2098,7 +2120,8 @@ def generate_loop_schedules_inner(kernel, debug_args={}):
             active_group_counts={},
 
             insns_in_topologically_sorted_order=(
-                get_insns_in_topologically_sorted_order(kernel)),
+                get_insns_in_topologically_sorted_order(
+                    kernel, simplified_depends_on_graph)),
     )
 
     schedule_gen_kwargs = {}

From bc23d84419919227f58d01b21caf48a387402aed Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Sun, 25 Apr 2021 00:20:58 -0500
Subject: [PATCH 321/460] update comment

---
 test/test_linearization_checker.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/test/test_linearization_checker.py b/test/test_linearization_checker.py
index 50eab91cc..8f83577e0 100644
--- a/test/test_linearization_checker.py
+++ b/test/test_linearization_checker.py
@@ -2329,8 +2329,7 @@ def _dep_with_condition(stmt_before, stmt_after, cond):
 # {{{ test_linearization_using_simplified_dep_graph
 
 def test_linearization_using_simplified_dep_graph():
-    # Test use of simplified dep graph inside find_loop_insn_dep_map(),
-    # which is called during linearization.
+    # Test use of simplified dep graph during linearization.
     # The deps created below should yield a simplified dep graph that causes the
     # linearization process to order assignments below in numerical order
 

From 915471ef62fe627d3fa82c597a54c8421a8b9740 Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Sun, 25 Apr 2021 21:54:59 -0500
Subject: [PATCH 322/460] add constrain_loop_nesting to imported functions

---
 loopy/__init__.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/loopy/__init__.py b/loopy/__init__.py
index 6672a7c12..ed11600f1 100644
--- a/loopy/__init__.py
+++ b/loopy/__init__.py
@@ -67,7 +67,8 @@
 from loopy.version import VERSION, MOST_RECENT_LANGUAGE_VERSION
 
 from loopy.transform.iname import (
-        set_loop_priority, prioritize_loops, untag_inames,
+        set_loop_priority, prioritize_loops, constrain_loop_nesting,
+        untag_inames,
         split_iname, chunk_iname, join_inames, tag_inames, duplicate_inames,
         rename_iname, remove_unused_inames,
         split_reduction_inward, split_reduction_outward,
@@ -187,7 +188,8 @@
 
         # {{{ transforms
 
-        "set_loop_priority", "prioritize_loops", "untag_inames",
+        "set_loop_priority", "prioritize_loops", "constrain_loop_nesting",
+        "untag_inames",
         "split_iname", "chunk_iname", "join_inames", "tag_inames",
         "duplicate_inames",
         "rename_iname", "remove_unused_inames",

From 1e68d02a2420f74e49ab8f7221bd7829073e7813 Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Sun, 25 Apr 2021 21:55:29 -0500
Subject: [PATCH 323/460] add use_loop_nest_constraints option

---
 loopy/options.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/loopy/options.py b/loopy/options.py
index 3742cb27b..895e655f7 100644
--- a/loopy/options.py
+++ b/loopy/options.py
@@ -172,6 +172,8 @@ class Options(ImmutableRecord):
         If equal to ``"no_check"``, then no check is performed.
     """
 
+    # TODO document use_loop_nest_constraints
+
     _legacy_options_map = {
             "cl_build_options": ("build_options", None),
             "write_cl": ("write_code", None),
@@ -232,6 +234,8 @@ def __init__(
                     False),
                 check_dep_resolution=kwargs.get("check_dep_resolution", True),
                 use_dependencies_v2=kwargs.get("use_dependencies_v2", False),
+                use_loop_nest_constraints=kwargs.get(
+                    "use_loop_nest_constraints", False),
 
                 enforce_variable_access_ordered=kwargs.get(
                     "enforce_variable_access_ordered", True),

From 7d7fc0216d4d994f79a990ed8f01f7b6f6c7fe95 Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Sun, 25 Apr 2021 21:56:08 -0500
Subject: [PATCH 324/460] add loop_nest_constraint attribute to kernel

---
 loopy/kernel/__init__.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/loopy/kernel/__init__.py b/loopy/kernel/__init__.py
index 9b022936b..96c609af3 100644
--- a/loopy/kernel/__init__.py
+++ b/loopy/kernel/__init__.py
@@ -252,6 +252,7 @@ class LoopKernel(ImmutableRecordWithoutPickling, Taggable):
     .. automethod:: tagged
     .. automethod:: without_tags
     """
+    # TODO document loop_nest_constraints attribute
 
     # {{{ constructor
 
@@ -272,6 +273,7 @@ def __init__(self, domains, instructions, args=None,
 
             iname_slab_increments=None,
             loop_priority=frozenset(),
+            loop_nest_constraints=None,
             silenced_warnings=None,
 
             applied_iname_rewrites=None,
@@ -417,6 +419,7 @@ def __init__(self, domains, instructions, args=None,
                 assumptions=assumptions,
                 iname_slab_increments=iname_slab_increments,
                 loop_priority=loop_priority,
+                loop_nest_constraints=loop_nest_constraints,
                 silenced_warnings=silenced_warnings,
                 temporary_variables=temporary_variables,
                 local_sizes=local_sizes,
@@ -1550,6 +1553,7 @@ def __setstate__(self, state):
             "substitutions",
             "iname_slab_increments",
             "loop_priority",
+            "loop_nest_constraints",
             "silenced_warnings",
             "options",
             "state",

From 7acf1aab980b262df2998265f4884b8d7fb4fffc Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Sun, 25 Apr 2021 22:00:22 -0500
Subject: [PATCH 325/460] copy in AND UPDATE functions for creating and
 checking new loop nest constraints from old branch
 (https://gitlab.tiker.net/jdsteve2/loopy/iname-sets-in-loop-priorities):
 UnexpandedInameSet, LoopNestConstraints, process_loop_nest_specification,
 constrain_loop_nesting, update_must_nest_graph, _expand_iname_sets_in_tuple,
 check_must_not_nest, check_must_not_nest_against_must_nest_graph

---
 loopy/transform/iname.py | 585 +++++++++++++++++++++++++++++++++++++++
 1 file changed, 585 insertions(+)

diff --git a/loopy/transform/iname.py b/loopy/transform/iname.py
index 8758284ef..673754a38 100644
--- a/loopy/transform/iname.py
+++ b/loopy/transform/iname.py
@@ -28,6 +28,7 @@
         RuleAwareIdentityMapper, RuleAwareSubstitutionMapper,
         SubstitutionRuleMappingContext)
 from loopy.diagnostic import LoopyError
+from pytools import Record
 
 
 __doc__ = """
@@ -112,6 +113,590 @@ def prioritize_loops(kernel, loop_priority):
 # }}}
 
 
+# {{{ loop nest constraints
+
+# {{{ classes to house loop nest constraints
+
+# {{{ UnexpandedInameSet
+
+class UnexpandedInameSet(Record):
+    def __init__(self, inames, complement=False):
+        Record.__init__(
+            self,
+            inames=inames,
+            complement=complement,
+            )
+
+    def contains(self, iname):
+        return (iname not in self.inames if self.complement
+            else iname in self.inames)
+
+    def contains_all(self, iname_set):
+        return (not (iname_set & self.inames) if self.complement
+            else iname_set.issubset(self.inames))
+
+    def get_inames_represented(self, iname_universe=None):
+        """Return the set of inames represented by the UnexpandedInameSet
+        """
+        if self.complement:
+            if not iname_universe:
+                raise ValueError(
+                    "Cannot expand UnexpandedInameSet %s without "
+                    "iname_universe." % (self))
+            return iname_universe-self.inames
+        else:
+            return self.inames.copy()
+
+    def __lt__(self, other):
+        # TODO is this function really necessary? If so, what should it return?
+        return self.__hash__() < other.__hash__()
+
+    def __hash__(self):
+        return hash(repr(self))
+
+    def update_persistent_hash(self, key_hash, key_builder):
+        """Custom hash computation function for use with
+        :class:`pytools.persistent_dict.PersistentDict`.
+        """
+
+        key_builder.rec(key_hash, self.inames)
+        key_builder.rec(key_hash, self.complement)
+
+    def __str__(self):
+        return "%s{%s}" % ("~" if self.complement else "",
+            ",".join(i for i in sorted(self.inames)))
+
+# }}}
+
+
+# {{{ LoopNestConstraints
+
+class LoopNestConstraints(Record):
+    def __init__(self, must_nest=None, must_not_nest=None,
+                 must_nest_graph=None):
+        Record.__init__(
+            self,
+            must_nest=must_nest,
+            must_not_nest=must_not_nest,
+            must_nest_graph=must_nest_graph,
+            )
+
+    def __hash__(self):
+        return hash(repr(self))
+
+    def update_persistent_hash(self, key_hash, key_builder):
+        """Custom hash computation function for use with
+        :class:`pytools.persistent_dict.PersistentDict`.
+        """
+
+        key_builder.rec(key_hash, self.must_nest)
+        key_builder.rec(key_hash, self.must_not_nest)
+        key_builder.rec(key_hash, self.must_nest_graph)
+
+    def __str__(self):
+        return "LoopNestConstraints(\n" \
+            "    must_nest = " + str(self.must_nest) + "\n" \
+            "    must_not_nest = " + str(self.must_not_nest) + "\n" \
+            "    must_nest_graph = " + str(self.must_nest_graph) + "\n" \
+            ")"
+
+# }}}
+
+# }}}
+
+
+# {{{ initial loop nest constraint creation
+
+# {{{ process_loop_nest_specification
+
+def process_loop_nest_specification(
+        nesting,
+        max_tuple_size=None,
+        complement_sets_allowed=True,
+        ):
+    # make sure user-supplied nesting conforms to rules
+    # convert string representations of nestings to tuple of UnexpandedInameSets
+
+    import re
+
+    def raise_loop_nest_input_error(msg):
+        valid_prio_rules = (
+            'Valid `must_nest` description formats: '
+            '"iname, iname, ..." or (str, str, str, ...), '
+            'where str can be of form '
+            '"iname" or "{iname, iname, ...}". No set complements allowed.\n'
+            'Valid `must_not_nest` description tuples must have len <= 2: '
+            '"iname, iname", "iname, ~iname", or '
+            '(str, str), where str can be of form '
+            '"iname", "~iname", "{iname, iname, ...}", or "~{iname, iname, ...}".'
+            )
+        raise ValueError(
+                "Invalid loop nest prioritization: %s\n"
+                "Loop nest prioritization formatting rules:\n%s"
+                % (msg, valid_prio_rules))
+
+    def _error_on_regex_match(match_str, target_str):
+        if re.findall(match_str, target_str):
+            raise_loop_nest_input_error(
+                "Unrecognized character(s) %s in nest string %s"
+                % (re.findall(match_str, target_str), target_str))
+
+    def _process_iname_set_str(iname_set_str):
+        # convert something like ~{i,j} or ~i or "i,j" to an UnexpandedInameSet
+
+        # remove leading/trailing whitespace
+        iname_set_str_stripped = iname_set_str.strip()
+
+        if iname_set_str_stripped[0] == "~":
+            # Make sure compelement is allowed
+            if not complement_sets_allowed:
+                raise_loop_nest_input_error(
+                    "Complement (~) not allowed in this loop nest string %s. "
+                    "If you have a use-case where allowing a currently "
+                    "disallowed set complement would be helpful, and the "
+                    "desired nesting constraint cannot easily be expressed "
+                    "another way, "
+                    "please contact the Loo.py maintainers."
+                    % (iname_set_str))
+
+            # Make sure that braces are included if multiple inames present
+            if "," in iname_set_str and not (
+                    iname_set_str.startswith("~{") and
+                    iname_set_str.endswith("}")):
+                raise_loop_nest_input_error(
+                    "Complements of sets containing multiple inames must "
+                    "enclose inames in braces: %s is not valid."
+                    % (iname_set_str))
+
+            complement = True
+        else:
+            complement = False
+
+        # remove leading/trailing tilde, braces, and space
+        iname_set_str_stripped = iname_set_str_stripped.strip("~{} ")
+
+        # should be no remaining special characters besides comma and space
+        _error_on_regex_match(r'([^,\w ])', iname_set_str_stripped)
+
+        # split by commas or spaces to get inames
+        inames = re.findall(r'([\w]+)(?:[ |,]*|$)', iname_set_str_stripped)
+
+        # make sure iname count matches what we expect from comma count
+        if len(inames) != iname_set_str_stripped.count(",") + 1:
+            raise_loop_nest_input_error(
+                "Found %d inames but expected %d in string %s."
+                % (len(inames), iname_set_str_stripped.count(",") + 1,
+                   iname_set_str_stripped))
+
+        return UnexpandedInameSet(
+            set([s.strip() for s in iname_set_str_stripped.split(",")]),
+            complement=complement)
+
+    if isinstance(nesting, str):
+        # Enforce that priorities involving iname sets be passed as tuple
+        # Iname sets defined negatively with a single iname are allowed here
+
+        # Check for any special characters besides comma, space, and tilde.
+        # E.g., curly braces would indicate that an iname set was NOT
+        # passed as a tuple, which is not allowed.
+        _error_on_regex_match(r'([^,\w~ ])', nesting)
+
+        # Split by comma and process each tier
+        nesting_as_tuple = tuple(
+            _process_iname_set_str(set_str) for set_str in nesting.split(","))
+    else:
+        # nesting not passed as string; process each tier
+        nesting_as_tuple = tuple(
+            _process_iname_set_str(set_str) for set_str in nesting)
+
+    # check max_inames_per_set
+    if max_tuple_size and len(nesting_as_tuple) > max_tuple_size:
+        raise_loop_nest_input_error(
+            "Loop nest prioritization tuple %s exceeds max tuple size %d."
+            % (nesting_as_tuple))
+
+    # make sure nesting has len > 1
+    if len(nesting_as_tuple) <= 1:
+        raise_loop_nest_input_error(
+            "Loop nest prioritization tuple %s must have length > 1."
+            % (nesting_as_tuple))
+
+    # return tuple of UnexpandedInameSets
+    return nesting_as_tuple
+
+# }}}
+
+
+# {{{ constrain_loop_nesting
+
+def constrain_loop_nesting(
+        kernel, must_nest=None, must_not_nest=None):
+    # TODO docstring
+    # TODO what if someone passes single-iname prio?
+    # TODO enforce that must_nest be a single tuple not list of tuples
+    # (or update implementation to allow list of tuples)
+
+    # check for existing constraints
+    if kernel.loop_nest_constraints:
+        if kernel.loop_nest_constraints.must_nest:
+            must_nest_constraints_old = kernel.loop_nest_constraints.must_nest
+        else:
+            must_nest_constraints_old = set()
+        if kernel.loop_nest_constraints.must_not_nest:
+            must_not_nest_constraints_old = \
+                kernel.loop_nest_constraints.must_not_nest
+        else:
+            must_not_nest_constraints_old = set()
+        if kernel.loop_nest_constraints.must_nest_graph:
+            must_nest_graph_old = kernel.loop_nest_constraints.must_nest_graph
+        else:
+            must_nest_graph_old = {}
+    else:
+        must_nest_constraints_old = set()
+        must_not_nest_constraints_old = set()
+        must_nest_graph_old = {}
+
+    # {{{ process must_nest
+
+    # TODO remove (TEMPORARY HACK TO KEEP LEGACY CODE RUNNING)
+    # expand_must_priorities = set()
+
+    if must_nest:
+        # {{{ parse must_nest, check for conflicts, combine with old constraints
+
+        # {{{ Parse must_nest; no complements allowed
+        must_nest_tuple = process_loop_nest_specification(
+            must_nest, complement_sets_allowed=False)
+        # }}}
+
+        # {{{ Error if someone prioritizes concurrent iname
+        from loopy.kernel.data import ConcurrentTag
+        for iname_set in must_nest_tuple:
+            for iname in iname_set.inames:
+                if kernel.iname_tags_of_type(iname, ConcurrentTag):
+                    raise ValueError(
+                        "iname %s tagged with ConcurrentTag, "
+                        "cannot use iname in must-nest constraint %s."
+                        % (iname, must_nest_tuple))
+        # }}}
+
+        # {{{ must_nest_graph_new <- update_must_nest_graph(...)
+
+        # (checks for cycles)
+        must_nest_graph_new = update_must_nest_graph(
+            must_nest_graph_old, must_nest_tuple, kernel.all_inames())
+
+        # }}}
+
+        # {{{ make sure must_nest constraints don't violate must_not_nest
+        # this may not catch all problems (?)
+        check_must_not_nest_against_must_nest_graph(
+            must_not_nest_constraints_old, must_nest_graph_new)
+        # }}}
+
+        #  {{{ check for conflicts with inames tagged 'vec' (must be innermost)
+        from loopy.kernel.data import VectorizeTag
+        for iname in kernel.all_inames():
+            if kernel.iname_tags_of_type(iname, VectorizeTag) and (
+                    must_nest_graph_new.get(iname, set())):
+                # Iname cannot be a leaf, error
+                raise ValueError(
+                    "Iname %s tagged as 'vec', but loop priorities "
+                    "%s require that iname %s nest outside of inames %s. "
+                    "Vectorized inames must nest innermost; cannot "
+                    "impose loop nest specification."
+                    % (iname, must_nest, iname,
+                    must_nest_graph_new.get(iname, set())))
+        # }}}
+
+        # TODO remove (TEMPORARY HACK TO KEEP LEGACY CODE RUNNING)
+        # expand_must_priorities = _expand_iname_sets_in_tuple(
+        #     must_nest_tuple, kernel.all_inames())
+
+        # {{{ combine new must_nest constraints with old
+        must_nest_constraints_new = must_nest_constraints_old | set(
+            [must_nest_tuple, ])
+        # }}}
+
+        # }}}
+    else:
+        # {{{ no new must_nest constraints, keep the old ones
+        must_nest_constraints_new = must_nest_constraints_old
+        must_nest_graph_new = must_nest_graph_old
+        # }}}
+
+    # }}}
+
+    # {{{ process must_not_nest
+
+    if must_not_nest:
+        # {{{ parse must_not_nest, check for conflicts, combine with old constraints
+
+        # {{{ Parse must_not_nest; complements allowed; max_tuple_size=2
+        must_not_nest_tuple = process_loop_nest_specification(
+            must_not_nest, max_tuple_size=2)
+        # }}}
+
+        # {{{ make sure must_not_nest constraints don't violate must_nest
+        # (cycles are allowed in must_not_nest constraints)
+        import itertools
+        must_pairs = []
+        for iname_before, inames_after in must_nest_graph_new.items():
+            must_pairs.extend(list(itertools.product([iname_before], inames_after)))
+
+        if not check_must_not_nest(must_pairs, must_not_nest_tuple):
+            raise ValueError(
+                "constrain_loop_nesting: nest constraint conflict detected. "
+                "must_not_nest constraints %s inconsistent with "
+                "must_nest constraints %s."
+                % (must_not_nest_tuple, must_nest_constraints_new))
+        # }}}
+
+        # {{{ combine new must_not_nest constraints with old
+        must_not_nest_constraints_new = must_not_nest_constraints_old | set([
+            must_not_nest_tuple, ])
+        # }}}
+
+        # }}}
+    else:
+        # {{{ no new must_not_nest constraints, keep the old ones
+        must_not_nest_constraints_new = must_not_nest_constraints_old
+        # }}}
+
+    # }}}
+
+    nest_constraints = LoopNestConstraints(
+        must_nest=must_nest_constraints_new,
+        must_not_nest=must_not_nest_constraints_new,
+        must_nest_graph=must_nest_graph_new,
+        )
+
+    # TODO do something with old priorities???
+    return kernel.copy(
+            # loop_priority=kernel.loop_priority.union(expand_must_priorities),
+            loop_nest_constraints=nest_constraints,
+            )
+
+# }}}
+
+
+# {{{ update_must_nest_graph
+
+def update_must_nest_graph(must_nest_graph, must_nest, all_inames):
+    # Note: there should not be any complements in the must_nest tuples
+    from copy import deepcopy
+    new_graph = deepcopy(must_nest_graph)
+
+    # first, all inames must be a node in the graph:
+    for missing_iname in all_inames - new_graph.keys():
+        new_graph[missing_iname] = set()
+
+    # get (before, after) pairs:
+    must_nest_expanded = _expand_iname_sets_in_tuple(must_nest, all_inames)
+
+    # update graph:
+    for before, after in must_nest_expanded:
+        new_graph[before].add(after)
+
+    # compute transitive closure:
+    from pytools.graph import compute_transitive_closure
+    # Note: compute_transitive_closure now allows cycles, will not error
+    new_graph_closure = compute_transitive_closure(new_graph)
+
+    # Check for inconsistent must_nest constraints by checking for cycle:
+    from pytools.graph import contains_cycle
+    if contains_cycle(new_graph_closure):
+        raise ValueError(
+            "update_must_nest_graph: Loop priority cycle detected. "
+            "must_nest constraints %s inconsistent with existing "
+            "must_nest constraints %s."
+            % (must_nest, must_nest_graph))
+    return new_graph_closure
+
+# }}}
+
+
+# {{{ _expand_iname_sets_in_tuple
+
+def _expand_iname_sets_in_tuple(
+        iname_sets_tuple,  # (UnexpandedInameSet, Unex..., ...)
+        all_inames,
+        ):
+
+    # First convert negatively defined iname sets to sets
+    positively_defined_iname_sets = []
+    for iname_set in iname_sets_tuple:
+        positively_defined_iname_sets.append(
+            iname_set.get_inames_represented(all_inames))
+
+    # Now expand all priority tuples into (before, after) pairs using
+    # Cartesian product of all pairs of sets
+    # (Assumes prio_sets length > 1)
+    import itertools
+    loop_priority_pairs = set()
+    for i, before_set in enumerate(positively_defined_iname_sets[:-1]):
+        for after_set in positively_defined_iname_sets[i+1:]:
+            loop_priority_pairs.update(
+                list(itertools.product(before_set, after_set)))
+
+    # Make sure no priority tuple contains an iname twice
+    for prio_tuple in loop_priority_pairs:
+        if len(set(prio_tuple)) != len(prio_tuple):
+            raise ValueError(
+                "Loop nesting %s contains cycle: %s. "
+                % (iname_sets_tuple, prio_tuple))
+    return loop_priority_pairs
+
+# }}}
+
+# }}}
+
+
+# {{{ checking constraints
+
+# {{{ check_must_nest (TODO copied in from old branch, not yet enabled)
+
+"""
+def check_must_nest(all_loop_nests, must_nest, all_inames):
+    # in order to make sure must_nest is satisfied, we
+    # need to expand all must_nest tiers
+
+    # TODO instead of expanding tiers into all pairs up front,
+    # create these pairs one at a time so that we can stop as soon as we fail
+
+    must_nest_expanded = _expand_iname_sets_in_tuple(must_nest, all_inames)
+    # must_nest_expanded contains pairs
+    for before, after in must_nest_expanded:
+        found = False
+        for nesting in all_loop_nests:
+            if before in nesting and after in nesting and (
+                    nesting.index(before) < nesting.index(after)):
+                found = True
+                break
+        if not found:
+            return False
+    return True
+"""
+
+# }}}
+
+
+# {{{ check_must_not_nest
+
+def check_must_not_nest(all_loop_nests, must_not_nest):
+    # recall that must_not_nest may only contain two tiers
+
+    for nesting in all_loop_nests:
+        # Go thru each pair in all_loop_nests
+        for i, iname_before in enumerate(nesting):
+            for iname_after in nesting[i+1:]:
+                # Check whether it violates must not nest
+                if (must_not_nest[0].contains(iname_before)
+                        and must_not_nest[1].contains(iname_after)):
+                    # Stop as soon as we fail
+                    return False
+    return True
+
+# }}}
+
+
+# {{{ check_all_must_not_nests (TODO copied in from old branch, not yet enabled)
+
+"""
+def check_all_must_not_nests(all_loop_nests, must_not_nests):
+    # recall that must_not_nest may only contain two tiers
+    for must_not_nest in must_not_nests:
+        if not check_must_not_nest(all_loop_nests, must_not_nest):
+            return False
+    return True
+"""
+
+# }}}
+
+
+# {{{ is_loop_nesting_valid (TODO copied in from old branch, not yet enabled)
+
+"""
+def is_loop_nesting_valid(
+        all_loop_nests,
+        must_nest_constraints,
+        must_not_nest_constraints,
+        all_inames):
+
+    # check must-nest constraints
+    must_nest_valid = True
+    if must_nest_constraints:
+        for must_nest in must_nest_constraints:
+            if not check_must_nest(
+                    all_loop_nests, must_nest, all_inames):
+                must_nest_valid = False
+                break
+
+    # check must-not-nest constraints
+    must_not_nest_valid = True
+    if must_not_nest_constraints is not None:
+        for must_not_nest in must_not_nest_constraints:
+            if not check_must_not_nest(
+                    all_loop_nests, must_not_nest):
+                must_not_nest_valid = False
+                break
+
+    return must_nest_valid and must_not_nest_valid
+"""
+
+# }}}
+
+
+# {{{ check_must_not_nest_against_must_nest_graph
+
+def check_must_not_nest_against_must_nest_graph(
+        must_not_nest_constraints, must_nest_graph):
+    # make sure none of the must_nest constraints violate must_not_nest
+    # this may not catch all problems
+
+    if must_not_nest_constraints and must_nest_graph:
+        import itertools
+        must_pairs = []
+        for iname_before, inames_after in must_nest_graph.items():
+            must_pairs.extend(
+                list(itertools.product([iname_before], inames_after)))
+        if any(not check_must_not_nest(must_pairs, must_not_nest_tuple)
+                for must_not_nest_tuple in must_not_nest_constraints):
+            raise ValueError(
+                "Nest constraint conflict detected. "
+                "must_not_nest constraints %s inconsistent with "
+                "must_nest relationships (must_nest graph: %s)."
+                % (must_not_nest_constraints, must_nest_graph))
+
+# }}}
+
+
+# {{{ get_iname_nestings (TODO copied in from old branch, not yet enabled)
+
+def get_iname_nestings(outline):
+    from loopy.schedule import EnterLoop, LeaveLoop
+    # return a list of tuples representing deepest nestings
+    nestings = []
+    current_tiers = []
+    already_exiting_loops = False
+    for outline_item in outline:
+        if isinstance(outline_item, EnterLoop):
+            already_exiting_loops = False
+            current_tiers.append(outline_item.iname)
+        elif isinstance(outline_item, LeaveLoop):
+            if not already_exiting_loops:
+                nestings.append(tuple(current_tiers))
+                already_exiting_loops = True
+            del current_tiers[-1]
+    return nestings
+
+# }}}
+
+# }}}
+
+# }}}
+
+
 # {{{ split/chunk inames
 
 # {{{ backend

From 0f27f079f6ebb76ffa6b022e69faa44410941d7a Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Sun, 25 Apr 2021 22:09:00 -0500
Subject: [PATCH 326/460] fix flake8 issues

---
 loopy/transform/iname.py | 23 ++++++++++++-----------
 1 file changed, 12 insertions(+), 11 deletions(-)

diff --git a/loopy/transform/iname.py b/loopy/transform/iname.py
index 673754a38..cdc4387f4 100644
--- a/loopy/transform/iname.py
+++ b/loopy/transform/iname.py
@@ -221,14 +221,15 @@ def process_loop_nest_specification(
 
     def raise_loop_nest_input_error(msg):
         valid_prio_rules = (
-            'Valid `must_nest` description formats: '
-            '"iname, iname, ..." or (str, str, str, ...), '
-            'where str can be of form '
-            '"iname" or "{iname, iname, ...}". No set complements allowed.\n'
-            'Valid `must_not_nest` description tuples must have len <= 2: '
-            '"iname, iname", "iname, ~iname", or '
-            '(str, str), where str can be of form '
-            '"iname", "~iname", "{iname, iname, ...}", or "~{iname, iname, ...}".'
+            "Valid `must_nest` description formats: "
+            "\"iname, iname, ...\" or (str, str, str, ...), "
+            "where str can be of form "
+            "\"iname\" or \"{iname, iname, ...}\". No set complements allowed.\n"
+            "Valid `must_not_nest` description tuples must have len <= 2: "
+            "\"iname, iname\", \"iname, ~iname\", or "
+            "(str, str), where str can be of form "
+            "\"iname\", \"~iname\", \"{iname, iname, ...}\", or "
+            "\"~{iname, iname, ...}\"."
             )
         raise ValueError(
                 "Invalid loop nest prioritization: %s\n"
@@ -276,10 +277,10 @@ def _process_iname_set_str(iname_set_str):
         iname_set_str_stripped = iname_set_str_stripped.strip("~{} ")
 
         # should be no remaining special characters besides comma and space
-        _error_on_regex_match(r'([^,\w ])', iname_set_str_stripped)
+        _error_on_regex_match(r"([^,\w ])", iname_set_str_stripped)
 
         # split by commas or spaces to get inames
-        inames = re.findall(r'([\w]+)(?:[ |,]*|$)', iname_set_str_stripped)
+        inames = re.findall(r"([\w]+)(?:[ |,]*|$)", iname_set_str_stripped)
 
         # make sure iname count matches what we expect from comma count
         if len(inames) != iname_set_str_stripped.count(",") + 1:
@@ -299,7 +300,7 @@ def _process_iname_set_str(iname_set_str):
         # Check for any special characters besides comma, space, and tilde.
         # E.g., curly braces would indicate that an iname set was NOT
         # passed as a tuple, which is not allowed.
-        _error_on_regex_match(r'([^,\w~ ])', nesting)
+        _error_on_regex_match(r"([^,\w~ ])", nesting)
 
         # Split by comma and process each tier
         nesting_as_tuple = tuple(

From fcbe50062aad0eea2594401f964b870e5fc24e42 Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Mon, 26 Apr 2021 02:37:41 -0500
Subject: [PATCH 327/460] fix a few more invalid loop nest constraints cases

---
 loopy/transform/iname.py | 42 ++++++++++++++++++++++++++++++++++------
 1 file changed, 36 insertions(+), 6 deletions(-)

diff --git a/loopy/transform/iname.py b/loopy/transform/iname.py
index cdc4387f4..01ef4f9a9 100644
--- a/loopy/transform/iname.py
+++ b/loopy/transform/iname.py
@@ -248,6 +248,11 @@ def _process_iname_set_str(iname_set_str):
         # remove leading/trailing whitespace
         iname_set_str_stripped = iname_set_str.strip()
 
+        if not iname_set_str_stripped:
+            raise_loop_nest_input_error(
+                "Found 0 inames in string %s."
+                % (iname_set_str))
+
         if iname_set_str_stripped[0] == "~":
             # Make sure compelement is allowed
             if not complement_sets_allowed:
@@ -260,10 +265,17 @@ def _process_iname_set_str(iname_set_str):
                     "please contact the Loo.py maintainers."
                     % (iname_set_str))
 
+            # remove tilde
+            iname_set_str_stripped = iname_set_str_stripped[1:]
+            if "~" in iname_set_str_stripped:
+                raise_loop_nest_input_error(
+                    "Multiple complement symbols found in iname set string %s"
+                    % (iname_set_str))
+
             # Make sure that braces are included if multiple inames present
-            if "," in iname_set_str and not (
-                    iname_set_str.startswith("~{") and
-                    iname_set_str.endswith("}")):
+            if "," in iname_set_str_stripped and not (
+                    iname_set_str_stripped.startswith("{") and
+                    iname_set_str_stripped.endswith("}")):
                 raise_loop_nest_input_error(
                     "Complements of sets containing multiple inames must "
                     "enclose inames in braces: %s is not valid."
@@ -273,8 +285,21 @@ def _process_iname_set_str(iname_set_str):
         else:
             complement = False
 
-        # remove leading/trailing tilde, braces, and space
-        iname_set_str_stripped = iname_set_str_stripped.strip("~{} ")
+        # remove leading/trailing spaces
+        iname_set_str_stripped = iname_set_str_stripped.strip(" ")
+
+        # make sure braces are valid and strip them
+        if iname_set_str_stripped[0] == "{":
+            if not iname_set_str_stripped[-1] == "}":
+                raise_loop_nest_input_error(
+                    "Invalid braces: %s" % (iname_set_str))
+            else:
+                # remove enclosing braces
+                iname_set_str_stripped = iname_set_str_stripped[1:-1]
+        # if there are dangling braces around, they will be caught next
+
+        # remove any more spaces
+        iname_set_str_stripped = iname_set_str_stripped.strip()
 
         # should be no remaining special characters besides comma and space
         _error_on_regex_match(r"([^,\w ])", iname_set_str_stripped)
@@ -287,7 +312,12 @@ def _process_iname_set_str(iname_set_str):
             raise_loop_nest_input_error(
                 "Found %d inames but expected %d in string %s."
                 % (len(inames), iname_set_str_stripped.count(",") + 1,
-                   iname_set_str_stripped))
+                   iname_set_str))
+
+        if len(inames) == 0:
+            raise_loop_nest_input_error(
+                "Found empty set in string %s."
+                % (iname_set_str))
 
         return UnexpandedInameSet(
             set([s.strip() for s in iname_set_str_stripped.split(",")]),

From 75b3d804fb014f59a399bc48c8b03b3417f80f2c Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Mon, 26 Apr 2021 02:38:21 -0500
Subject: [PATCH 328/460] create test for loop nest semantics parsing (copied
 in from old branch
 (https://gitlab.tiker.net/jdsteve2/loopy/iname-sets-in-loop-priorities))

---
 test/test_loop_nest_semantics.py | 198 +++++++++++++++++++++++++++++++
 1 file changed, 198 insertions(+)
 create mode 100644 test/test_loop_nest_semantics.py

diff --git a/test/test_loop_nest_semantics.py b/test/test_loop_nest_semantics.py
new file mode 100644
index 000000000..3b6381b44
--- /dev/null
+++ b/test/test_loop_nest_semantics.py
@@ -0,0 +1,198 @@
+__copyright__ = "Copyright (C) 2021 James Stevens"
+
+__license__ = """
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+"""
+
+import sys
+import loopy as lp
+import pyopencl as cl
+
+import logging
+logger = logging.getLogger(__name__)
+
+try:
+    import faulthandler
+except ImportError:
+    pass
+else:
+    faulthandler.enable()
+
+from pyopencl.tools import pytest_generate_tests_for_pyopencl \
+        as pytest_generate_tests
+
+__all__ = [
+        "pytest_generate_tests",
+        "cl"  # "cl.create_some_context"
+        ]
+
+
+from loopy.version import LOOPY_USE_LANGUAGE_VERSION_2018_2  # noqa
+
+
+def test_loop_constraint_strings_validity_check():
+    ref_knl = lp.make_kernel(
+            "{ [g,h,i,j,k,xx]: 0<=g,h,i,j,k,xx<n }",
+            "out[g,h,i,j,k,xx] = 2*a[g,h,i,j,k,xx]",
+            assumptions="n >= 1",
+            )
+
+    try:
+        lp.constrain_loop_nesting(ref_knl, "{g,h,k},{j,i}")
+        assert False
+    except ValueError as e:
+        assert "Unrecognized character(s)" in str(e)
+
+    try:
+        lp.constrain_loop_nesting(ref_knl, "{g,h,i,k},{j}")
+        assert False
+    except ValueError as e:
+        assert "Unrecognized character(s)" in str(e)
+
+    try:
+        lp.constrain_loop_nesting(ref_knl, "{g,{h,i,k}")
+        assert False
+    except ValueError as e:
+        assert "Unrecognized character(s)" in str(e)
+
+    try:
+        lp.constrain_loop_nesting(ref_knl, "{g,~h,i,k}")
+        assert False
+    except ValueError as e:
+        assert "Unrecognized character(s)" in str(e)
+
+    try:
+        lp.constrain_loop_nesting(ref_knl, "{g,#h,i,k}")
+        assert False
+    except ValueError as e:
+        assert "Unrecognized character(s)" in str(e)
+
+    try:
+        lp.constrain_loop_nesting(ref_knl, ("{g,{h}", "i,k"))
+        assert False
+    except ValueError as e:
+        assert "Unrecognized character(s)" in str(e)
+
+    try:
+        lp.constrain_loop_nesting(ref_knl, ("{g,~h}", "i,k"))
+        assert False
+    except ValueError as e:
+        assert "Unrecognized character(s)" in str(e)
+
+    try:
+        lp.constrain_loop_nesting(ref_knl, ("k", "~{g,h}", "{g,h}"))
+        assert False
+    except ValueError as e:
+        assert "Complement (~) not allowed" in str(e)
+
+    try:
+        lp.constrain_loop_nesting(ref_knl, ("k", "{i,j,k}", "{g,h}"))
+        assert False
+    except ValueError as e:
+        assert "contains cycle" in str(e)
+
+    try:
+        lp.constrain_loop_nesting(ref_knl, must_not_nest=("~j,i", "{j,i}"))
+        assert False
+    except ValueError as e:
+        assert ("Complements of sets containing multiple inames "
+            "must enclose inames in braces") in str(e)
+
+    try:
+        lp.constrain_loop_nesting(ref_knl, must_nest=("k", "{h}", "{j,i,}"))
+        assert False
+    except ValueError as e:
+        assert ("Found 2 inames but expected 3") in str(e)
+
+    try:
+        lp.constrain_loop_nesting(ref_knl, must_nest=("k", "{h}", "{j, x x, i}"))
+        assert False
+    except ValueError as e:
+        assert ("Found 4 inames but expected 3") in str(e)
+
+    try:
+        lp.constrain_loop_nesting(ref_knl, must_nest="{h}}")
+        assert False
+    except ValueError as e:
+        assert (
+            "Unrecognized character(s) ['{', '}', '}'] in nest string {h}}"
+            ) in str(e)
+
+    try:
+        lp.constrain_loop_nesting(ref_knl, must_nest="{h i j,,}")
+        assert False
+    except ValueError as e:
+        assert(
+            "Unrecognized character(s) [\'{\', \'}\'] in nest string {h i j,,}"
+            ) in str(e)
+
+    try:
+        lp.constrain_loop_nesting(ref_knl, must_nest=("{h}}", "i"))
+        assert False
+    except ValueError as e:
+        assert (
+            "Unrecognized character(s) [\'}\'] in nest string h}"
+            ) in str(e)
+
+    # TODO these should pass
+    """
+    try:
+        lp.constrain_loop_nesting(ref_knl, must_nest=("{h i j,,}", "k"))
+        assert False
+    except ValueError as e:
+        assert("Unrecognized character(s) [\'{\', \'}\'] in nest string {h i j,,}"
+            ) in str(e)
+    """
+
+    # valid syntax
+    lp.constrain_loop_nesting(ref_knl, must_not_nest=("~{j,i}", "{j,i}"))
+    lp.constrain_loop_nesting(ref_knl, must_not_nest=("{h}", "{j,i}"))
+    lp.constrain_loop_nesting(ref_knl, must_not_nest=("h", "{j,i}"))
+    lp.constrain_loop_nesting(ref_knl, must_nest=("k", "{h}", "{j,i}"))
+    lp.constrain_loop_nesting(ref_knl, must_nest=("k", "h", "{j,i}"))
+    lp.constrain_loop_nesting(ref_knl, must_not_nest="~j,j")
+    lp.constrain_loop_nesting(ref_knl, must_nest="k,h,j")
+
+    # handling spaces
+    knl = lp.constrain_loop_nesting(ref_knl, must_nest=("k", "{h }", " { j , i } "))
+    assert list(knl.loop_nest_constraints.must_nest)[0][0].inames == set("k")
+    assert list(knl.loop_nest_constraints.must_nest)[0][1].inames == set("h")
+    assert list(knl.loop_nest_constraints.must_nest)[0][2].inames == set(["j", "i"])
+
+    try:
+        knl = lp.constrain_loop_nesting(ref_knl, ("j", "{}"))
+        assert False
+    except ValueError as e:
+        assert "Found 0 inames" in str(e)
+
+    try:
+        knl = lp.constrain_loop_nesting(ref_knl, ("j", ""))
+        assert False
+    except ValueError as e:
+        assert "Found 0 inames" in str(e)
+
+
+if __name__ == "__main__":
+    if len(sys.argv) > 1:
+        exec(sys.argv[1])
+    else:
+        from pytest import main
+        main([__file__])
+
+# vim: foldmethod=marker

From b2ec4bf0801abe2ce9f78721aa1b070ae2d70b22 Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Mon, 26 Apr 2021 02:58:21 -0500
Subject: [PATCH 329/460] un-comment-out (and slightly improve) more old
 functions copied in from previous branch
 (https://gitlab.tiker.net/jdsteve2/loopy/iname-sets-in-loop-priorities):
 check_must_nest(), is_loop_nesting_valid(); rename function
 is_loop_nesting_valid->loop_nest_constraints_satisfied

---
 loopy/transform/iname.py | 20 ++++++--------------
 1 file changed, 6 insertions(+), 14 deletions(-)

diff --git a/loopy/transform/iname.py b/loopy/transform/iname.py
index 01ef4f9a9..3d7425b77 100644
--- a/loopy/transform/iname.py
+++ b/loopy/transform/iname.py
@@ -585,9 +585,8 @@ def _expand_iname_sets_in_tuple(
 
 # {{{ checking constraints
 
-# {{{ check_must_nest (TODO copied in from old branch, not yet enabled)
+# {{{ check_must_nest
 
-"""
 def check_must_nest(all_loop_nests, must_nest, all_inames):
     # in order to make sure must_nest is satisfied, we
     # need to expand all must_nest tiers
@@ -607,7 +606,6 @@ def check_must_nest(all_loop_nests, must_nest, all_inames):
         if not found:
             return False
     return True
-"""
 
 # }}}
 
@@ -645,35 +643,29 @@ def check_all_must_not_nests(all_loop_nests, must_not_nests):
 # }}}
 
 
-# {{{ is_loop_nesting_valid (TODO copied in from old branch, not yet enabled)
+# {{{ loop_nest_constraints_satisfied
 
-"""
-def is_loop_nesting_valid(
+def loop_nest_constraints_satisfied(
         all_loop_nests,
         must_nest_constraints,
         must_not_nest_constraints,
         all_inames):
 
     # check must-nest constraints
-    must_nest_valid = True
     if must_nest_constraints:
         for must_nest in must_nest_constraints:
             if not check_must_nest(
                     all_loop_nests, must_nest, all_inames):
-                must_nest_valid = False
-                break
+                return False
 
     # check must-not-nest constraints
-    must_not_nest_valid = True
     if must_not_nest_constraints is not None:
         for must_not_nest in must_not_nest_constraints:
             if not check_must_not_nest(
                     all_loop_nests, must_not_nest):
-                must_not_nest_valid = False
-                break
+                return False
 
-    return must_nest_valid and must_not_nest_valid
-"""
+    return True
 
 # }}}
 

From 42578f6c5ef415da3885a99d4e424048e111d4f2 Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Mon, 26 Apr 2021 02:59:06 -0500
Subject: [PATCH 330/460] copy in test for loop nest constraint satisfaction
 from old branch
 (https://gitlab.tiker.net/jdsteve2/loopy/iname-sets-in-loop-priorities)

---
 test/test_loop_nest_semantics.py | 94 +++++++++++++++++++++++++++++++-
 1 file changed, 93 insertions(+), 1 deletion(-)

diff --git a/test/test_loop_nest_semantics.py b/test/test_loop_nest_semantics.py
index 3b6381b44..866bce249 100644
--- a/test/test_loop_nest_semantics.py
+++ b/test/test_loop_nest_semantics.py
@@ -46,7 +46,9 @@
 from loopy.version import LOOPY_USE_LANGUAGE_VERSION_2018_2  # noqa
 
 
-def test_loop_constraint_strings_validity_check():
+# {{{ test_loop_constraint_string_parsing
+
+def test_loop_constraint_string_parsing():
     ref_knl = lp.make_kernel(
             "{ [g,h,i,j,k,xx]: 0<=g,h,i,j,k,xx<n }",
             "out[g,h,i,j,k,xx] = 2*a[g,h,i,j,k,xx]",
@@ -187,6 +189,96 @@ def test_loop_constraint_strings_validity_check():
     except ValueError as e:
         assert "Found 0 inames" in str(e)
 
+# }}}
+
+
+# {{{ test_loop_nest_constraints_satisfied
+
+def test_loop_nest_constraints_satisfied():
+    from loopy.transform.iname import (
+        process_loop_nest_specification,
+        loop_nest_constraints_satisfied,
+    )
+
+    all_inames = frozenset(["g", "h", "i", "j", "k"])
+
+    must_nest_constraints = [
+        process_loop_nest_specification(
+            nesting=("{g,h}", "~{g,h}"),
+            complement_sets_allowed=True),
+        ]
+    must_not_nest_constraints = [
+        process_loop_nest_specification(
+            nesting="k,~k",
+            complement_sets_allowed=True),
+        ]
+
+    loop_nests = set([("g", "h", "i", "j", "k"), ])
+    valid = loop_nest_constraints_satisfied(
+        loop_nests, must_nest_constraints, must_not_nest_constraints, all_inames)
+    assert valid
+
+    loop_nests = set([("g", "i", "h", "j", "k"), ])
+    valid = loop_nest_constraints_satisfied(
+        loop_nests, must_nest_constraints, must_not_nest_constraints, all_inames)
+    assert not valid
+
+    loop_nests = set([("g", "h", "i", "k", "j"), ])
+    valid = loop_nest_constraints_satisfied(
+        loop_nests, must_nest_constraints, must_not_nest_constraints, all_inames)
+    assert not valid
+
+    # now j, k must be innermost
+    must_not_nest_constraints = [
+        process_loop_nest_specification(("{k,j}", "~{k,j}")),
+        ]
+    loop_nests = set([("g", "i", "h", "j", "k"), ])
+    valid = loop_nest_constraints_satisfied(
+        loop_nests, None, must_not_nest_constraints, all_inames)
+    assert valid
+
+    loop_nests = set([("g", "h", "i", "k", "j"), ])
+    valid = loop_nest_constraints_satisfied(
+        loop_nests, None, must_not_nest_constraints, all_inames)
+    assert valid
+
+    loop_nests = set([("g", "i", "j", "h", "k"), ])
+    valid = loop_nest_constraints_satisfied(
+        loop_nests, None, must_not_nest_constraints, all_inames)
+    assert not valid
+
+    loop_nests = set([("g", "h", "j", "k", "i"), ])
+    valid = loop_nest_constraints_satisfied(
+        loop_nests, None, must_not_nest_constraints, all_inames)
+    assert not valid
+
+    loop_nests = set([("j", "k"), ])
+    valid = loop_nest_constraints_satisfied(
+        loop_nests, None, must_not_nest_constraints, all_inames)
+    assert valid
+
+    loop_nests = set([("g", "k"), ])  # j not present
+    valid = loop_nest_constraints_satisfied(
+        loop_nests, None, must_not_nest_constraints, all_inames)
+    assert valid
+
+    loop_nests = set([("g", "i"), ])  # j, k not present
+    valid = loop_nest_constraints_satisfied(
+        loop_nests, None, must_not_nest_constraints, all_inames)
+    assert valid
+
+    loop_nests = set([("k",), ])  # only k present
+    valid = loop_nest_constraints_satisfied(
+        loop_nests, None, must_not_nest_constraints, all_inames)
+    assert valid
+
+    loop_nests = set([("i",), ])
+    valid = loop_nest_constraints_satisfied(
+        loop_nests, None, must_not_nest_constraints, all_inames)
+    assert valid
+
+# }}}
+
 
 if __name__ == "__main__":
     if len(sys.argv) > 1:

From ddfc0e52cc3c05f4e60847c8816802d29036c2cc Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Mon, 26 Apr 2021 03:04:10 -0500
Subject: [PATCH 331/460] test for adding (multiple) loop nest constraints to a
 kernel; copied in from old branch
 (https://gitlab.tiker.net/jdsteve2/loopy/iname-sets-in-loop-priorities)

---
 test/test_loop_nest_semantics.py | 55 ++++++++++++++++++++++++++++++++
 1 file changed, 55 insertions(+)

diff --git a/test/test_loop_nest_semantics.py b/test/test_loop_nest_semantics.py
index 866bce249..cc1b31a77 100644
--- a/test/test_loop_nest_semantics.py
+++ b/test/test_loop_nest_semantics.py
@@ -22,6 +22,7 @@
 
 import sys
 import loopy as lp
+import numpy as np
 import pyopencl as cl
 
 import logging
@@ -280,6 +281,60 @@ def test_loop_nest_constraints_satisfied():
 # }}}
 
 
+# {{{ test_multiple_nest_constraints
+
+def test_multiple_nest_constraints():
+    ref_knl = lp.make_kernel(
+            "{ [g,h,i,j,k,x,y,z]: 0<=g,h,i,j,k,x,y,z<n }",
+            '''
+            out[g,h,i,j,k] = 2*a[g,h,i,j,k]
+            for x,y
+                out2[x,y] = 2*a2[x,y]
+                for z
+                    out3[x,y,z] = 2*a3[x,y,z]
+                end
+            end
+            ''',
+            assumptions="n >= 1",
+            )
+    ref_knl = lp.add_and_infer_dtypes(ref_knl, {"a,a2,a3": np.dtype(np.float32)})
+    knl = ref_knl
+    knl = lp.constrain_loop_nesting(
+        knl, must_not_nest=("{k,i}", "~{k,i}"))
+    knl = lp.constrain_loop_nesting(
+        knl, must_nest=("g", "h,i"))
+    knl = lp.constrain_loop_nesting(
+        knl, must_nest=("g", "j", "k"))
+    knl = lp.constrain_loop_nesting(
+        knl, must_nest=("g", "j", "h"))
+    knl = lp.constrain_loop_nesting(
+        knl, must_nest=("i", "k"))
+    knl = lp.constrain_loop_nesting(
+        knl, must_nest=("x", "y"))
+
+    must_nest_knl = knl.loop_nest_constraints.must_nest
+    from loopy.transform.iname import UnexpandedInameSet
+    must_nest_expected = set([
+        (UnexpandedInameSet(set(["g"], )), UnexpandedInameSet(set(["h", "i"], ))),
+        (UnexpandedInameSet(set(["g"], )), UnexpandedInameSet(set(["j"], )),
+            UnexpandedInameSet(set(["k"], ))),
+        (UnexpandedInameSet(set(["g"], )), UnexpandedInameSet(set(["j"], )),
+            UnexpandedInameSet(set(["h"], ))),
+        (UnexpandedInameSet(set(["i"], )), UnexpandedInameSet(set(["k"], ))),
+        (UnexpandedInameSet(set(["x"], )), UnexpandedInameSet(set(["y"], ))),
+        ])
+    assert must_nest_knl == must_nest_expected
+
+    must_not_nest_knl = knl.loop_nest_constraints.must_not_nest
+    must_not_nest_expected = set([
+        (UnexpandedInameSet(set(["k", "i"], )), UnexpandedInameSet(set(["k", "i"], ),
+            complement=True)),
+        ])
+    assert must_not_nest_knl == must_not_nest_expected
+
+# }}}
+
+
 if __name__ == "__main__":
     if len(sys.argv) > 1:
         exec(sys.argv[1])

From 8c51083e21063ef5c58095ccb0b7b1b56dc0ce82 Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Mon, 26 Apr 2021 03:06:51 -0500
Subject: [PATCH 332/460] use term 'nest constraints' instead of 'loop
 priority' in cycle error string

---
 loopy/transform/iname.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/loopy/transform/iname.py b/loopy/transform/iname.py
index 3d7425b77..e0e904c96 100644
--- a/loopy/transform/iname.py
+++ b/loopy/transform/iname.py
@@ -538,7 +538,7 @@ def update_must_nest_graph(must_nest_graph, must_nest, all_inames):
     from pytools.graph import contains_cycle
     if contains_cycle(new_graph_closure):
         raise ValueError(
-            "update_must_nest_graph: Loop priority cycle detected. "
+            "update_must_nest_graph: Nest constraint cycle detected. "
             "must_nest constraints %s inconsistent with existing "
             "must_nest constraints %s."
             % (must_nest, must_nest_graph))

From c3dec79a5a1260e86a39ab1149e07c2c9ab3c039 Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Mon, 26 Apr 2021 03:07:29 -0500
Subject: [PATCH 333/460] test for catching incompatible loop nest constraints;
 copied in from old branch
 (https://gitlab.tiker.net/jdsteve2/loopy/iname-sets-in-loop-priorities)

---
 test/test_loop_nest_semantics.py | 46 ++++++++++++++++++++++++++++++--
 1 file changed, 44 insertions(+), 2 deletions(-)

diff --git a/test/test_loop_nest_semantics.py b/test/test_loop_nest_semantics.py
index cc1b31a77..3e5ac0df1 100644
--- a/test/test_loop_nest_semantics.py
+++ b/test/test_loop_nest_semantics.py
@@ -281,9 +281,9 @@ def test_loop_nest_constraints_satisfied():
 # }}}
 
 
-# {{{ test_multiple_nest_constraints
+# {{{ test_adding_multiple_nest_constraints_to_knl
 
-def test_multiple_nest_constraints():
+def test_adding_multiple_nest_constraints_to_knl():
     ref_knl = lp.make_kernel(
             "{ [g,h,i,j,k,x,y,z]: 0<=g,h,i,j,k,x,y,z<n }",
             '''
@@ -335,6 +335,48 @@ def test_multiple_nest_constraints():
 # }}}
 
 
+# {{{
+
+def test_incompatible_nest_constraints():
+    ref_knl = lp.make_kernel(
+            "{ [g,h,i,j,k,x,y,z]: 0<=g,h,i,j,k,x,y,z<n }",
+            '''
+            out[g,h,i,j,k] = 2*a[g,h,i,j,k]
+            for x,y
+                out2[x,y] = 2*a2[x,y]
+                for z
+                    out3[x,y,z] = 2*a3[x,y,z]
+                end
+            end
+            ''',
+            assumptions="n >= 1",
+            )
+    ref_knl = lp.add_and_infer_dtypes(ref_knl, {"a,a2,a3": np.dtype(np.float32)})
+    knl = ref_knl
+    knl = lp.constrain_loop_nesting(
+        knl, must_not_nest=("{k,i}", "~{k,i}"))
+
+    try:
+        knl = lp.constrain_loop_nesting(
+            knl, must_nest=("k", "h"))  # (should fail)
+        assert False
+    except ValueError as e:
+        assert "Nest constraint conflict detected" in str(e)
+
+    knl = ref_knl
+    knl = lp.constrain_loop_nesting(
+        knl, must_nest=("g", "j", "k"))
+
+    try:
+        knl = lp.constrain_loop_nesting(
+            knl, must_nest=("j", "g"))  # (should fail)
+        assert False
+    except ValueError as e:
+        assert "Nest constraint cycle detected" in str(e)
+
+# }}}
+
+
 if __name__ == "__main__":
     if len(sys.argv) > 1:
         exec(sys.argv[1])

From 308fba4293f5c313cfb92c021dca8cd50de71f45 Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Mon, 26 Apr 2021 03:24:33 -0500
Subject: [PATCH 334/460] comment out currently unused function

---
 loopy/transform/iname.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/loopy/transform/iname.py b/loopy/transform/iname.py
index e0e904c96..c634cee40 100644
--- a/loopy/transform/iname.py
+++ b/loopy/transform/iname.py
@@ -696,6 +696,7 @@ def check_must_not_nest_against_must_nest_graph(
 
 # {{{ get_iname_nestings (TODO copied in from old branch, not yet enabled)
 
+"""
 def get_iname_nestings(outline):
     from loopy.schedule import EnterLoop, LeaveLoop
     # return a list of tuples representing deepest nestings
@@ -712,6 +713,7 @@ def get_iname_nestings(outline):
                 already_exiting_loops = True
             del current_tiers[-1]
     return nestings
+"""
 
 # }}}
 

From 7d5652aed4e392a8831e6183a885458a0bd84760 Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Mon, 26 Apr 2021 03:25:21 -0500
Subject: [PATCH 335/460] rename last_entered_loop->deepest_active_iname to be
 more accurate and precise

---
 loopy/schedule/__init__.py | 31 ++++++++++++++++---------------
 1 file changed, 16 insertions(+), 15 deletions(-)

diff --git a/loopy/schedule/__init__.py b/loopy/schedule/__init__.py
index fc084fd68..962022e68 100644
--- a/loopy/schedule/__init__.py
+++ b/loopy/schedule/__init__.py
@@ -363,6 +363,7 @@ def find_loop_insn_dep_map(
                 # If at least one of the three cases above succeeds for every
                 # dep_insn_iname, we can add dep_insn to iname's set of insns
                 # in result dict.
+                # (means dep_insn must be scheduled before entering iname loop)
                 iname_dep.add(dep_insn_id)
 
     return result
@@ -682,7 +683,7 @@ class SchedulerState(ImmutableRecord):
     # TODO document simplified_depends_on_graph
 
     @property
-    def last_entered_loop(self):
+    def deepest_active_iname(self):
         if self.active_inames:
             return self.active_inames[-1]
         else:
@@ -1161,34 +1162,34 @@ def insn_sort_key(insn_id):
 
     # {{{ see if we're ready to leave the innermost loop
 
-    last_entered_loop = sched_state.last_entered_loop
+    deepest_active_iname = sched_state.deepest_active_iname
 
-    if last_entered_loop is not None:
+    if deepest_active_iname is not None:
         can_leave = True
 
         if (
-                last_entered_loop in sched_state.prescheduled_inames
+                deepest_active_iname in sched_state.prescheduled_inames
                 and not (
                     isinstance(next_preschedule_item, LeaveLoop)
-                    and next_preschedule_item.iname == last_entered_loop)):
+                    and next_preschedule_item.iname == deepest_active_iname)):
             # A prescheduled loop can only be left if the preschedule agrees.
             if debug_mode:
                 print("cannot leave '%s' because of preschedule constraints"
-                      % last_entered_loop)
+                      % deepest_active_iname)
             can_leave = False
-        elif last_entered_loop not in sched_state.breakable_inames:
+        elif deepest_active_iname not in sched_state.breakable_inames:
             # If the iname is not breakable, then check that we've
             # scheduled all the instructions that require it.
 
             for insn_id in sched_state.unscheduled_insn_ids:
                 insn = kernel.id_to_insn[insn_id]
-                if last_entered_loop in insn.within_inames:
+                if deepest_active_iname in insn.within_inames:
                     if debug_mode:
                         print("cannot leave '%s' because '%s' still depends on it"
-                                % (last_entered_loop, format_insn(kernel, insn.id)))
+                            % (deepest_active_iname, format_insn(kernel, insn.id)))
 
                         # check if there's a dependency of insn that needs to be
-                        # outside of last_entered_loop.
+                        # outside of deepest_active_iname.
                         for subdep_id in gen_dependencies_except(
                                 kernel, insn_id,
                                 sched_state.scheduled_insn_ids,
@@ -1196,7 +1197,7 @@ def insn_sort_key(insn_id):
                             want = (kernel.insn_inames(subdep_id)
                                     - sched_state.parallel_inames)
                             if (
-                                    last_entered_loop not in want):
+                                    deepest_active_iname not in want):
                                 print(
                                     "%(warn)swarning:%(reset_all)s '%(iname)s', "
                                     "which the schedule is "
@@ -1210,7 +1211,7 @@ def insn_sort_key(insn_id):
                                     % {
                                         "warn": Fore.RED + Style.BRIGHT,
                                         "reset_all": Style.RESET_ALL,
-                                        "iname": last_entered_loop,
+                                        "iname": deepest_active_iname,
                                         "subdep": format_insn_id(kernel, subdep_id),
                                         "dep": format_insn_id(kernel, insn_id),
                                         "subdep_i": format_insn(kernel, subdep_id),
@@ -1237,7 +1238,7 @@ def insn_sort_key(insn_id):
                     if ignore_count:
                         ignore_count -= 1
                     else:
-                        assert sched_item.iname == last_entered_loop
+                        assert sched_item.iname == deepest_active_iname
                         if seen_an_insn:
                             can_leave = True
                         break
@@ -1248,12 +1249,12 @@ def insn_sort_key(insn_id):
                         sched_state.copy(
                             schedule=(
                                 sched_state.schedule
-                                + (LeaveLoop(iname=last_entered_loop),)),
+                                + (LeaveLoop(iname=deepest_active_iname),)),
                             active_inames=sched_state.active_inames[:-1],
                             insn_ids_to_try=insn_ids_to_try,
                             preschedule=(
                                 sched_state.preschedule
-                                if last_entered_loop
+                                if deepest_active_iname
                                 not in sched_state.prescheduled_inames
                                 else sched_state.preschedule[1:]),
                         ),

From 62ffb73a2bc3c790bee92f673dc7c9976b3db7a2 Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Mon, 26 Apr 2021 03:37:00 -0500
Subject: [PATCH 336/460] rename uninformative 'want' variable in various
 places, mostly want->nonconc_insn_inames_wanted

---
 loopy/schedule/__init__.py | 27 +++++++++++++++------------
 1 file changed, 15 insertions(+), 12 deletions(-)

diff --git a/loopy/schedule/__init__.py b/loopy/schedule/__init__.py
index 962022e68..e0a12effc 100644
--- a/loopy/schedule/__init__.py
+++ b/loopy/schedule/__init__.py
@@ -1016,19 +1016,21 @@ def insn_sort_key(insn_id):
         if not is_ready:
             continue
 
-        want = insn.within_inames - sched_state.parallel_inames
+        nonconc_insn_inames_wanted = insn.within_inames - sched_state.parallel_inames
         have = active_inames_set - sched_state.parallel_inames
 
-        if want != have:
+        if nonconc_insn_inames_wanted != have:
             is_ready = False
 
             if debug_mode:
-                if want-have:
+                if nonconc_insn_inames_wanted-have:
                     print("instruction '%s' is missing inames '%s'"
-                            % (format_insn(kernel, insn.id), ",".join(want-have)))
-                if have-want:
+                        % (format_insn(kernel, insn.id), ",".join(
+                            nonconc_insn_inames_wanted-have)))
+                if have-nonconc_insn_inames_wanted:
                     print("instruction '%s' won't work under inames '%s'"
-                            % (format_insn(kernel, insn.id), ",".join(have-want)))
+                        % (format_insn(kernel, insn.id), ",".join(
+                            have-nonconc_insn_inames_wanted)))
 
         # {{{ check if scheduling this insn is compatible with preschedule
 
@@ -1082,7 +1084,7 @@ def insn_sort_key(insn_id):
 
         # {{{ determine reachability
 
-        if (not is_ready and have <= want):
+        if (not is_ready and have <= nonconc_insn_inames_wanted):
             reachable_insn_ids.add(insn_id)
 
         # }}}
@@ -1194,10 +1196,11 @@ def insn_sort_key(insn_id):
                                 kernel, insn_id,
                                 sched_state.scheduled_insn_ids,
                                 sched_state.simplified_depends_on_graph):
-                            want = (kernel.insn_inames(subdep_id)
+                            nonconc_subdep_insn_inames_wanted = (
+                                    kernel.insn_inames(subdep_id)
                                     - sched_state.parallel_inames)
-                            if (
-                                    deepest_active_iname not in want):
+                            if (deepest_active_iname
+                                    not in nonconc_subdep_insn_inames_wanted):
                                 print(
                                     "%(warn)swarning:%(reset_all)s '%(iname)s', "
                                     "which the schedule is "
@@ -1373,9 +1376,9 @@ def insn_sort_key(insn_id):
             for insn_id in reachable_insn_ids:
                 insn = kernel.id_to_insn[insn_id]
 
-                want = insn.within_inames
+                wanted_insn_inames = insn.within_inames
 
-                if hypothetically_active_loops <= want:
+                if hypothetically_active_loops <= wanted_insn_inames:
                     if usefulness is None:
                         usefulness = insn.priority
                     else:

From c7018002bced59674bbee77d3c4e79f6cd399026 Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Mon, 26 Apr 2021 03:39:06 -0500
Subject: [PATCH 337/460] rename have->nonconc_active_inames

---
 loopy/schedule/__init__.py | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/loopy/schedule/__init__.py b/loopy/schedule/__init__.py
index e0a12effc..06de9e470 100644
--- a/loopy/schedule/__init__.py
+++ b/loopy/schedule/__init__.py
@@ -1017,20 +1017,20 @@ def insn_sort_key(insn_id):
             continue
 
         nonconc_insn_inames_wanted = insn.within_inames - sched_state.parallel_inames
-        have = active_inames_set - sched_state.parallel_inames
+        nonconc_active_inames = active_inames_set - sched_state.parallel_inames
 
-        if nonconc_insn_inames_wanted != have:
+        if nonconc_insn_inames_wanted != nonconc_active_inames:
             is_ready = False
 
             if debug_mode:
-                if nonconc_insn_inames_wanted-have:
+                if nonconc_insn_inames_wanted-nonconc_active_inames:
                     print("instruction '%s' is missing inames '%s'"
                         % (format_insn(kernel, insn.id), ",".join(
-                            nonconc_insn_inames_wanted-have)))
-                if have-nonconc_insn_inames_wanted:
+                            nonconc_insn_inames_wanted-nonconc_active_inames)))
+                if nonconc_active_inames-nonconc_insn_inames_wanted:
                     print("instruction '%s' won't work under inames '%s'"
                         % (format_insn(kernel, insn.id), ",".join(
-                            have-nonconc_insn_inames_wanted)))
+                            nonconc_active_inames-nonconc_insn_inames_wanted)))
 
         # {{{ check if scheduling this insn is compatible with preschedule
 
@@ -1084,7 +1084,7 @@ def insn_sort_key(insn_id):
 
         # {{{ determine reachability
 
-        if (not is_ready and have <= nonconc_insn_inames_wanted):
+        if (not is_ready and nonconc_active_inames <= nonconc_insn_inames_wanted):
             reachable_insn_ids.add(insn_id)
 
         # }}}

From 25bae77b27e54dffbce1ae5e85d1927950f54d00 Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Mon, 26 Apr 2021 04:09:34 -0500
Subject: [PATCH 338/460] remove use_loop_nest_constraints option (just use
 them if they exist)

---
 loopy/options.py | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/loopy/options.py b/loopy/options.py
index 895e655f7..3742cb27b 100644
--- a/loopy/options.py
+++ b/loopy/options.py
@@ -172,8 +172,6 @@ class Options(ImmutableRecord):
         If equal to ``"no_check"``, then no check is performed.
     """
 
-    # TODO document use_loop_nest_constraints
-
     _legacy_options_map = {
             "cl_build_options": ("build_options", None),
             "write_cl": ("write_code", None),
@@ -234,8 +232,6 @@ def __init__(
                     False),
                 check_dep_resolution=kwargs.get("check_dep_resolution", True),
                 use_dependencies_v2=kwargs.get("use_dependencies_v2", False),
-                use_loop_nest_constraints=kwargs.get(
-                    "use_loop_nest_constraints", False),
 
                 enforce_variable_access_ordered=kwargs.get(
                     "enforce_variable_access_ordered", True),

From 7f9fcdc97ffa5e58736965874435a8d68c0207a0 Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Mon, 26 Apr 2021 04:10:57 -0500
Subject: [PATCH 339/460] add some informative comments

---
 loopy/schedule/__init__.py | 14 +++++++++++++-
 1 file changed, 13 insertions(+), 1 deletion(-)

diff --git a/loopy/schedule/__init__.py b/loopy/schedule/__init__.py
index 06de9e470..33160b831 100644
--- a/loopy/schedule/__init__.py
+++ b/loopy/schedule/__init__.py
@@ -1020,6 +1020,7 @@ def insn_sort_key(insn_id):
         nonconc_active_inames = active_inames_set - sched_state.parallel_inames
 
         if nonconc_insn_inames_wanted != nonconc_active_inames:
+            # We don't have the inames we need, may need to open more loops
             is_ready = False
 
             if debug_mode:
@@ -1082,7 +1083,8 @@ def insn_sort_key(insn_id):
 
         # }}}
 
-        # {{{ determine reachability
+        # {{{ determine reachability (no active inames conflict w/insn, but
+        # may need more inames)
 
         if (not is_ready and nonconc_active_inames <= nonconc_insn_inames_wanted):
             reachable_insn_ids.add(insn_id)
@@ -1092,7 +1094,13 @@ def insn_sort_key(insn_id):
         if is_ready and debug_mode:
             print("ready to schedule '%s'" % format_insn(kernel, insn.id))
 
+        # (if we wanted, we could check to see whether adding insn would
+        # violate dependencies_v2 here, as done in old in-progress branch:
+        # https://gitlab.tiker.net/jdsteve2/loopy/-/merge_requests/15/diffs)
+
         if is_ready and not debug_mode:
+            # schedule this instruction and recurse
+
             iid_set = frozenset([insn.id])
 
             # {{{ update active group counts for added instruction
@@ -1162,6 +1170,9 @@ def insn_sort_key(insn_id):
 
     # }}}
 
+    # No insns are ready to be scheduled now, but some may be reachable
+    # reachable_insn_ids = no active inames conflict w/insn, but may need more inames
+
     # {{{ see if we're ready to leave the innermost loop
 
     deepest_active_iname = sched_state.deepest_active_iname
@@ -1186,6 +1197,7 @@ def insn_sort_key(insn_id):
             for insn_id in sched_state.unscheduled_insn_ids:
                 insn = kernel.id_to_insn[insn_id]
                 if deepest_active_iname in insn.within_inames:
+                    # cannot leave deepest_active_iname; insn still depends on it
                     if debug_mode:
                         print("cannot leave '%s' because '%s' still depends on it"
                             % (deepest_active_iname, format_insn(kernel, insn.id)))

From 86cdce775616ef8060a630298eb9ce39d277fd00 Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Mon, 26 Apr 2021 04:11:40 -0500
Subject: [PATCH 340/460] when linearizing, don't leave a loop if doing so
 would violate must_nest constraints

---
 loopy/schedule/__init__.py | 34 ++++++++++++++++++++++++++++++++++
 1 file changed, 34 insertions(+)

diff --git a/loopy/schedule/__init__.py b/loopy/schedule/__init__.py
index 33160b831..02407a8f2 100644
--- a/loopy/schedule/__init__.py
+++ b/loopy/schedule/__init__.py
@@ -1258,6 +1258,40 @@ def insn_sort_key(insn_id):
                             can_leave = True
                         break
 
+            # {{{ don't leave if doing so would violate must_nest constraints
+
+            # don't leave if must_nest constraints require that
+            # additional inames be nested inside the current iname
+            if can_leave:
+                must_nest_graph = (
+                    sched_state.kernel.loop_nest_constraints.must_nest_graph
+                    if sched_state.kernel.loop_nest_constraints else None)
+
+                if must_nest_graph:
+                    # get inames that must nest inside the current iname
+                    must_nest_inside = must_nest_graph[deepest_active_iname]
+
+                    if must_nest_inside:
+                        # get scheduled inames that are nested inside current iname
+                        within_deepest_active_iname = False
+                        actually_nested_inside = set()
+                        for sched_item in sched_state.schedule:
+                            if isinstance(sched_item, EnterLoop):
+                                if within_deepest_active_iname:
+                                    actually_nested_inside.add(sched_item.iname)
+                                elif sched_item.iname == deepest_active_iname:
+                                    within_deepest_active_iname = True
+                            elif (isinstance(sched_item, LeaveLoop) and
+                                    sched_item.iname == deepest_active_iname):
+                                break
+
+                        # don't leave if must_nest constraints require that
+                        # additional inames be nested inside the current iname
+                        if not must_nest_inside.issubset(actually_nested_inside):
+                            can_leave = False
+
+            # }}}
+
             if can_leave and not debug_mode:
 
                 for sub_sched in generate_loop_schedules_internal(

From 6d8c8c7e062ed16b696bd511ad42a027d4d3b16a Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Mon, 26 Apr 2021 04:15:51 -0500
Subject: [PATCH 341/460] rename
 needed_inames->unscheduled_nonconc_insn_inames_needed

---
 loopy/schedule/__init__.py | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/loopy/schedule/__init__.py b/loopy/schedule/__init__.py
index 02407a8f2..759125084 100644
--- a/loopy/schedule/__init__.py
+++ b/loopy/schedule/__init__.py
@@ -1317,11 +1317,11 @@ def insn_sort_key(insn_id):
     # {{{ see if any loop can be entered now
 
     # Find inames that are being referenced by as yet unscheduled instructions.
-    needed_inames = set()
+    unscheduled_nonconc_insn_inames_needed = set()
     for insn_id in sched_state.unscheduled_insn_ids:
-        needed_inames.update(kernel.insn_inames(insn_id))
+        unscheduled_nonconc_insn_inames_needed.update(kernel.insn_inames(insn_id))
 
-    needed_inames = (needed_inames
+    unscheduled_nonconc_insn_inames_needed = (unscheduled_nonconc_insn_inames_needed
             # There's no notion of 'entering' a parallel loop
             - sched_state.parallel_inames
 
@@ -1330,7 +1330,8 @@ def insn_sort_key(insn_id):
 
     if debug_mode:
         print(75*"-")
-        print("inames still needed :", ",".join(needed_inames))
+        print("inames still needed :", ",".join(
+            unscheduled_nonconc_insn_inames_needed))
         print("active inames :", ",".join(sched_state.active_inames))
         print("inames entered so far :", ",".join(sched_state.entered_inames))
         print("reachable insns:", ",".join(reachable_insn_ids))
@@ -1339,10 +1340,10 @@ def insn_sort_key(insn_id):
             for grp, c in sched_state.active_group_counts.items()))
         print(75*"-")
 
-    if needed_inames:
+    if unscheduled_nonconc_insn_inames_needed:
         iname_to_usefulness = {}
 
-        for iname in needed_inames:
+        for iname in unscheduled_nonconc_insn_inames_needed:
 
             # {{{ check if scheduling this iname now is allowed/plausible
 

From c21efddfa04f3c53e1d3be5eff9dd6b4105ca933 Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Mon, 26 Apr 2021 04:32:23 -0500
Subject: [PATCH 342/460] more helpful comments

---
 loopy/schedule/__init__.py | 20 ++++++++++++++++++++
 1 file changed, 20 insertions(+)

diff --git a/loopy/schedule/__init__.py b/loopy/schedule/__init__.py
index 759125084..e68572b7e 100644
--- a/loopy/schedule/__init__.py
+++ b/loopy/schedule/__init__.py
@@ -1346,6 +1346,9 @@ def insn_sort_key(insn_id):
         for iname in unscheduled_nonconc_insn_inames_needed:
 
             # {{{ check if scheduling this iname now is allowed/plausible
+            # based on preschedule constraints, loop_nest_around_map,
+            # loop_insn_dep_map, and data dependencies;
+            # if not, continue
 
             if (
                     iname in sched_state.prescheduled_inames
@@ -1359,6 +1362,9 @@ def insn_sort_key(insn_id):
 
             currently_accessible_inames = (
                     active_inames_set | sched_state.parallel_inames)
+
+            # check loop_nest_around_map to determine whether inames that must
+            # nest around iname are available
             if (
                     not sched_state.loop_nest_around_map[iname]
                     <= currently_accessible_inames):
@@ -1366,6 +1372,9 @@ def insn_sort_key(insn_id):
                     print("scheduling %s prohibited by loop nest-around map" % iname)
                 continue
 
+            # loop_insn_dep_map: dict mapping inames to other insn ids that need to
+            # be scheduled before the iname should be eligible for scheduling.
+            # If loop dependency map prohibits scheduling of iname, continue
             if (
                     not sched_state.loop_insn_dep_map.get(iname, set())
                     <= sched_state.scheduled_insn_ids):
@@ -1415,11 +1424,18 @@ def insn_sort_key(insn_id):
 
             # }}}
 
+            # so far, scheduling of iname is allowed/plausible
+
             # {{{ determine if that gets us closer to being able to schedule an insn
 
             usefulness = None  # highest insn priority enabled by iname
 
+            # suppose we were to activate this iname...
+            # would that get us closer to scheduling an insn?
+
             hypothetically_active_loops = active_inames_set | {iname}
+            # loop over reachable_insn_ids (reachable insn: no active inames
+            # conflict w/insn, but may need more inames)
             for insn_id in reachable_insn_ids:
                 insn = kernel.id_to_insn[insn_id]
 
@@ -1432,6 +1448,7 @@ def insn_sort_key(insn_id):
                         usefulness = max(usefulness, insn.priority)
 
             if usefulness is None:
+                # iname won't get us closer to scheduling insn
                 if debug_mode:
                     print("iname '%s' deemed not useful" % iname)
                 continue
@@ -1440,6 +1457,9 @@ def insn_sort_key(insn_id):
 
             # }}}
 
+        # keys of iname_to_usefulness are now inames that get us closer to
+        # scheduling an insn
+
         # {{{ tier building
 
         # Build priority tiers. If a schedule is found in the first tier, then

From c9c79524f9a3c887864e6466cab1860c0f8bd99d Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Mon, 26 Apr 2021 04:38:32 -0500
Subject: [PATCH 343/460] add some noqa

---
 loopy/transform/iname.py | 19 ++++++++++---------
 1 file changed, 10 insertions(+), 9 deletions(-)

diff --git a/loopy/transform/iname.py b/loopy/transform/iname.py
index c634cee40..3482305b1 100644
--- a/loopy/transform/iname.py
+++ b/loopy/transform/iname.py
@@ -221,15 +221,16 @@ def process_loop_nest_specification(
 
     def raise_loop_nest_input_error(msg):
         valid_prio_rules = (
-            "Valid `must_nest` description formats: "
-            "\"iname, iname, ...\" or (str, str, str, ...), "
-            "where str can be of form "
-            "\"iname\" or \"{iname, iname, ...}\". No set complements allowed.\n"
-            "Valid `must_not_nest` description tuples must have len <= 2: "
-            "\"iname, iname\", \"iname, ~iname\", or "
-            "(str, str), where str can be of form "
-            "\"iname\", \"~iname\", \"{iname, iname, ...}\", or "
-            "\"~{iname, iname, ...}\"."
+            "Valid `must_nest` description formats: "  # noqa
+            "\"iname, iname, ...\" or (str, str, str, ...), "  # noqa
+            "where str can be of form "  # noqa
+            "\"iname\" or \"{iname, iname, ...}\". "  # noqa
+            "No set complements allowed.\n"  # noqa
+            "Valid `must_not_nest` description tuples must have len <= 2: "  # noqa
+            "\"iname, iname\", \"iname, ~iname\", or "  # noqa
+            "(str, str), where str can be of form "  # noqa
+            "\"iname\", \"~iname\", \"{iname, iname, ...}\", or "  # noqa
+            "\"~{iname, iname, ...}\"."  # noqa
             )
         raise ValueError(
                 "Invalid loop nest prioritization: %s\n"

From 624f03c6d39277a748400181b22984b773315ecc Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Mon, 26 Apr 2021 04:39:20 -0500
Subject: [PATCH 344/460] fix flake8 issues

---
 test/test_loop_nest_semantics.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/test/test_loop_nest_semantics.py b/test/test_loop_nest_semantics.py
index 3e5ac0df1..50ea1e6d6 100644
--- a/test/test_loop_nest_semantics.py
+++ b/test/test_loop_nest_semantics.py
@@ -286,7 +286,7 @@ def test_loop_nest_constraints_satisfied():
 def test_adding_multiple_nest_constraints_to_knl():
     ref_knl = lp.make_kernel(
             "{ [g,h,i,j,k,x,y,z]: 0<=g,h,i,j,k,x,y,z<n }",
-            '''
+            """
             out[g,h,i,j,k] = 2*a[g,h,i,j,k]
             for x,y
                 out2[x,y] = 2*a2[x,y]
@@ -294,7 +294,7 @@ def test_adding_multiple_nest_constraints_to_knl():
                     out3[x,y,z] = 2*a3[x,y,z]
                 end
             end
-            ''',
+            """,
             assumptions="n >= 1",
             )
     ref_knl = lp.add_and_infer_dtypes(ref_knl, {"a,a2,a3": np.dtype(np.float32)})

From 61e1ede69d94194176458a26a990e5da32f20ce7 Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Mon, 26 Apr 2021 05:13:18 -0500
Subject: [PATCH 345/460] enable check_all_must_not_nests()

---
 loopy/transform/iname.py | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/loopy/transform/iname.py b/loopy/transform/iname.py
index 3482305b1..7686379bf 100644
--- a/loopy/transform/iname.py
+++ b/loopy/transform/iname.py
@@ -630,16 +630,14 @@ def check_must_not_nest(all_loop_nests, must_not_nest):
 # }}}
 
 
-# {{{ check_all_must_not_nests (TODO copied in from old branch, not yet enabled)
+# {{{ check_all_must_not_nests
 
-"""
 def check_all_must_not_nests(all_loop_nests, must_not_nests):
     # recall that must_not_nest may only contain two tiers
     for must_not_nest in must_not_nests:
         if not check_must_not_nest(all_loop_nests, must_not_nest):
             return False
     return True
-"""
 
 # }}}
 

From d6cb58406368a732f6c241e3e20e40615ee116c9 Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Mon, 26 Apr 2021 05:14:15 -0500
Subject: [PATCH 346/460] during linearization, use must-nest and must-not-nest
 constraints to determine whether a loop can be entered (if
 kernel.loop_nest_constraints exists, otherwise fall back to old priorities)

---
 loopy/schedule/__init__.py | 212 ++++++++++++++++++++++++++++---------
 1 file changed, 160 insertions(+), 52 deletions(-)

diff --git a/loopy/schedule/__init__.py b/loopy/schedule/__init__.py
index e68572b7e..1fa67ded6 100644
--- a/loopy/schedule/__init__.py
+++ b/loopy/schedule/__init__.py
@@ -1460,67 +1460,127 @@ def insn_sort_key(insn_id):
         # keys of iname_to_usefulness are now inames that get us closer to
         # scheduling an insn
 
-        # {{{ tier building
-
-        # Build priority tiers. If a schedule is found in the first tier, then
-        # loops in the second are not even tried (and so on).
-        loop_priority_set = set().union(*[set(prio)
-                                          for prio in
-                                          sched_state.kernel.loop_priority])
-        useful_loops_set = set(iname_to_usefulness.keys())
-        useful_and_desired = useful_loops_set & loop_priority_set
-
-        if useful_and_desired:
-            wanted = (
-                useful_and_desired
-                - sched_state.ilp_inames
-                - sched_state.vec_inames
-                )
-            priority_tiers = [t for t in
-                              get_priority_tiers(wanted,
-                                                 sched_state.kernel.loop_priority
-                                                 )
-                              ]
-
-            # Update the loop priority set, because some constraints may have
-            # have been contradictary.
-            loop_priority_set = set().union(*[set(t) for t in priority_tiers])
-
-            priority_tiers.append(
+        if sched_state.kernel.loop_nest_constraints:
+            # {{{ use loop_nest_constraints in determining next_iname_candidates
+
+            # inames not yet entered that would get us closer to scheduling an insn:
+            useful_loops_set = set(iname_to_usefulness.keys())
+
+            from loopy.transform.iname import (
+                check_all_must_not_nests,
+            )
+            from loopy.tools import (
+                get_graph_sources,
+            )
+            from pytools.graph import compute_induced_subgraph
+
+            # since vec_inames must be innermost,
+            # they are not valid canidates unless only vec_inames remain
+            if useful_loops_set - sched_state.vec_inames:
+                useful_loops_set -= sched_state.vec_inames
+
+            # to enter an iname without violating must_nest constraints,
+            # iname must be a source in the induced subgraph of must_nest_graph
+            # containing inames in useful_loops_set
+            must_nest_graph_full = (
+                sched_state.kernel.loop_nest_constraints.must_nest_graph
+                if sched_state.kernel.loop_nest_constraints else None)
+            if must_nest_graph_full:
+                must_nest_graph_useful = compute_induced_subgraph(
+                    must_nest_graph_full,
                     useful_loops_set
-                    - loop_priority_set
-                    - sched_state.ilp_inames
-                    - sched_state.vec_inames
                     )
+                source_inames = get_graph_sources(must_nest_graph_useful)
+            else:
+                source_inames = useful_loops_set
+
+            # since graph has a key for every iname,
+            # sources should be the only valid iname candidates
+
+            # check whether entering any source_inames violates
+            # must-not-nest constraints, given the currently active inames
+            must_not_nest_constraints = (
+                sched_state.kernel.loop_nest_constraints.must_not_nest
+                if sched_state.kernel.loop_nest_constraints else None)
+            if must_not_nest_constraints:
+                next_iname_candidates = set()
+                for next_iname in source_inames:
+                    iname_orders_to_check = [
+                        (active_iname, next_iname)
+                        for active_iname in active_inames_set]
+
+                    if check_all_must_not_nests(
+                            iname_orders_to_check, must_not_nest_constraints):
+                        next_iname_candidates.add(next_iname)
+            else:
+                next_iname_candidates = source_inames
+
+            # }}}
         else:
-            priority_tiers = [
-                    useful_loops_set
+            # {{{ old tier building
+
+            # Build priority tiers. If a schedule is found in the first tier, then
+            # loops in the second are not even tried (and so on).
+            loop_priority_set = set().union(*[set(prio)
+                                              for prio in
+                                              sched_state.kernel.loop_priority])
+            useful_loops_set = set(iname_to_usefulness.keys())
+            useful_and_desired = useful_loops_set & loop_priority_set
+
+            if useful_and_desired:
+                wanted = (
+                    useful_and_desired
                     - sched_state.ilp_inames
                     - sched_state.vec_inames
-                    ]
-
-        # vectorization must be the absolute innermost loop
-        priority_tiers.extend([
-            [iname]
-            for iname in sched_state.ilp_inames
-            if iname in useful_loops_set
-            ])
+                    )
+                priority_tiers = [t for t in
+                                  get_priority_tiers(wanted,
+                                                     sched_state.kernel.loop_priority
+                                                     )
+                                  ]
+
+                # Update the loop priority set, because some constraints may have
+                # have been contradictary.
+                loop_priority_set = set().union(*[set(t) for t in priority_tiers])
+
+                priority_tiers.append(
+                        useful_loops_set
+                        - loop_priority_set
+                        - sched_state.ilp_inames
+                        - sched_state.vec_inames
+                        )
+            else:
+                priority_tiers = [
+                        useful_loops_set
+                        - sched_state.ilp_inames
+                        - sched_state.vec_inames
+                        ]
+
+            # vectorization must be the absolute innermost loop
+            priority_tiers.extend([
+                [iname]
+                for iname in sched_state.ilp_inames
+                if iname in useful_loops_set
+                ])
+
+            priority_tiers.extend([
+                [iname]
+                for iname in sched_state.vec_inames
+                if iname in useful_loops_set
+                ])
 
-        priority_tiers.extend([
-            [iname]
-            for iname in sched_state.vec_inames
-            if iname in useful_loops_set
-            ])
+            # }}}
 
-        # }}}
+        if sched_state.kernel.loop_nest_constraints:
+            # {{{ loop over next_iname_candidates generated w/ loop_nest_constraints
 
-        if debug_mode:
-            print("useful inames: %s" % ",".join(useful_loops_set))
-        else:
-            for tier in priority_tiers:
+            if debug_mode:
+                print("useful inames: %s" % ",".join(useful_loops_set))
+            else:
                 found_viable_schedule = False
 
-                for iname in sorted(tier,
+                # loop over iname candidates; enter inames and recurse:
+                for iname in sorted(next_iname_candidates,
                         key=lambda iname: (
                             iname_to_usefulness.get(iname, 0),
                             # Sort by iname to achieve deterministic
@@ -1528,6 +1588,7 @@ def insn_sort_key(insn_id):
                             iname),
                         reverse=True):
 
+                    # enter the loop and recurse
                     for sub_sched in generate_loop_schedules_internal(
                             sched_state.copy(
                                 schedule=(
@@ -1541,16 +1602,63 @@ def insn_sort_key(insn_id):
                                 insn_ids_to_try=insn_ids_to_try,
                                 preschedule=(
                                     sched_state.preschedule
-                                    if iname not in sched_state.prescheduled_inames
+                                    if iname not in
+                                    sched_state.prescheduled_inames
                                     else sched_state.preschedule[1:]),
                                 ),
                             debug=debug):
+
                         found_viable_schedule = True
                         yield sub_sched
 
+                # TODO what happened if found_viable_schedule is false?
                 if found_viable_schedule:
                     return
 
+            # }}}
+        else:
+            # {{{ old looping over tiers
+
+            if debug_mode:
+                print("useful inames: %s" % ",".join(useful_loops_set))
+            else:
+                for tier in priority_tiers:
+                    found_viable_schedule = False
+
+                    for iname in sorted(tier,
+                            key=lambda iname: (
+                                iname_to_usefulness.get(iname, 0),
+                                # Sort by iname to achieve deterministic
+                                # ordering of generated schedules.
+                                iname),
+                            reverse=True):
+
+                        for sub_sched in generate_loop_schedules_internal(
+                                sched_state.copy(
+                                    schedule=(
+                                        sched_state.schedule
+                                        + (EnterLoop(iname=iname),)),
+                                    active_inames=(
+                                        sched_state.active_inames + (iname,)),
+                                    entered_inames=(
+                                        sched_state.entered_inames
+                                        | frozenset((iname,))),
+                                    insn_ids_to_try=insn_ids_to_try,
+                                    preschedule=(
+                                        sched_state.preschedule
+                                        if iname not in
+                                        sched_state.prescheduled_inames
+                                        else sched_state.preschedule[1:]),
+                                    ),
+                                debug=debug):
+                            found_viable_schedule = True
+                            yield sub_sched
+
+                    if found_viable_schedule:
+                        return
+
+            # }}}
+
     # }}}
 
     if debug_mode:

From 94b6f27f8a970ed1fbdf3916eb8532259fc74e59 Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Mon, 26 Apr 2021 05:18:19 -0500
Subject: [PATCH 347/460] enable get_iname_nestings()

---
 loopy/transform/iname.py | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/loopy/transform/iname.py b/loopy/transform/iname.py
index 7686379bf..687a04965 100644
--- a/loopy/transform/iname.py
+++ b/loopy/transform/iname.py
@@ -693,9 +693,8 @@ def check_must_not_nest_against_must_nest_graph(
 # }}}
 
 
-# {{{ get_iname_nestings (TODO copied in from old branch, not yet enabled)
+# {{{ get_iname_nestings
 
-"""
 def get_iname_nestings(outline):
     from loopy.schedule import EnterLoop, LeaveLoop
     # return a list of tuples representing deepest nestings
@@ -712,7 +711,6 @@ def get_iname_nestings(outline):
                 already_exiting_loops = True
             del current_tiers[-1]
     return nestings
-"""
 
 # }}}
 

From c246bc9729d6c8229a9d95795786a51b9ce28365 Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Mon, 26 Apr 2021 05:27:59 -0500
Subject: [PATCH 348/460] make sure ALL must_nest_constraints are satisfied
 before yielding a linearization

---
 loopy/schedule/__init__.py | 24 +++++++++++++++++++++++-
 1 file changed, 23 insertions(+), 1 deletion(-)

diff --git a/loopy/schedule/__init__.py b/loopy/schedule/__init__.py
index 1fa67ded6..0910f6aac 100644
--- a/loopy/schedule/__init__.py
+++ b/loopy/schedule/__init__.py
@@ -1669,10 +1669,32 @@ def insn_sort_key(insn_id):
         if inp:
             raise ScheduleDebugInput(inp)
 
+    # {{{ make sure ALL must_nest_constraints are satisfied
+
+    # (the check above avoids contradicting some must_nest constraints,
+    # but we don't know if all required nestings are present)
+    # TODO is this the only place we need to check all must_nest constraints?
+    must_constraints_satisfied = True
+    if sched_state.kernel.loop_nest_constraints:
+        from loopy.transform.iname import (
+            get_iname_nestings,
+            loop_nest_constraints_satisfied,
+        )
+        must_nest_constraints = sched_state.kernel.loop_nest_constraints.must_nest
+        if must_nest_constraints:
+            sched_tiers = get_iname_nestings(sched_state.schedule)
+            must_constraints_satisfied = loop_nest_constraints_satisfied(
+                sched_tiers, must_nest_constraints,
+                must_not_nest_constraints=None,  # (checked upon loop creation)
+                all_inames=kernel.all_inames())
+
+    # }}}
+
     if (
             not sched_state.active_inames
             and not sched_state.unscheduled_insn_ids
-            and not sched_state.preschedule):
+            and not sched_state.preschedule
+            and must_constraints_satisfied):
         # if done, yield result
         debug.log_success(sched_state.schedule)
 

From 0bdc80e2a212ca7782e7ccff0e5e6a0f0c66e67c Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Mon, 26 Apr 2021 05:35:36 -0500
Subject: [PATCH 349/460] add get_graph_sources() to iname.py for now

---
 loopy/schedule/__init__.py |  2 --
 loopy/transform/iname.py   | 11 +++++++++++
 2 files changed, 11 insertions(+), 2 deletions(-)

diff --git a/loopy/schedule/__init__.py b/loopy/schedule/__init__.py
index 0910f6aac..d7068fae3 100644
--- a/loopy/schedule/__init__.py
+++ b/loopy/schedule/__init__.py
@@ -1468,8 +1468,6 @@ def insn_sort_key(insn_id):
 
             from loopy.transform.iname import (
                 check_all_must_not_nests,
-            )
-            from loopy.tools import (
                 get_graph_sources,
             )
             from pytools.graph import compute_induced_subgraph
diff --git a/loopy/transform/iname.py b/loopy/transform/iname.py
index 687a04965..ba0d9f7c2 100644
--- a/loopy/transform/iname.py
+++ b/loopy/transform/iname.py
@@ -714,6 +714,17 @@ def get_iname_nestings(outline):
 
 # }}}
 
+
+# {{{ get graph sources
+
+def get_graph_sources(graph):
+    sources = set(graph.keys())
+    for non_sources in graph.values():
+        sources -= non_sources
+    return sources
+
+# }}}
+
 # }}}
 
 # }}}

From ee59a67033cf5da16ac2aadd3e20d90b2dacd951 Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Mon, 26 Apr 2021 05:36:18 -0500
Subject: [PATCH 350/460] add test to ensure that vec inames are linearized
 innermost

---
 test/test_loop_nest_semantics.py | 96 +++++++++++++++++++++++++++++++-
 1 file changed, 95 insertions(+), 1 deletion(-)

diff --git a/test/test_loop_nest_semantics.py b/test/test_loop_nest_semantics.py
index 50ea1e6d6..40749fe02 100644
--- a/test/test_loop_nest_semantics.py
+++ b/test/test_loop_nest_semantics.py
@@ -335,7 +335,7 @@ def test_adding_multiple_nest_constraints_to_knl():
 # }}}
 
 
-# {{{
+# {{{ test_incompatible_nest_constraints
 
 def test_incompatible_nest_constraints():
     ref_knl = lp.make_kernel(
@@ -377,6 +377,100 @@ def test_incompatible_nest_constraints():
 # }}}
 
 
+# {{{ test_vec_innermost:
+
+def test_vec_innermost():
+
+    def is_innermost(iname, linearization_items):
+        from loopy.schedule import (EnterLoop, LeaveLoop)
+
+        # find EnterLoop(iname) in linearization
+        enter_iname_idx = None
+        for i, linearization_item in enumerate(linearization_items):
+            if isinstance(linearization_item, EnterLoop) and (
+                    linearization_item.iname == iname):
+                enter_iname_idx = i
+                break
+        else:
+            # iname not found
+            return False
+
+        # now go through remaining linearization items after EnterLoop(iname)
+        for linearization_item in linearization_items[enter_iname_idx+1:]:
+            if isinstance(linearization_item, LeaveLoop):
+                # Break as soon as we find a LeaveLoop
+                # If this happens before we find an EnterLoop, iname is innermost
+                break
+            elif isinstance(linearization_item, EnterLoop):
+                # we found an EnterLoop inside iname
+                return False
+
+        return True
+
+    ref_knl = lp.make_kernel(
+            "{ [g,h,i,j,k]: 0<=g,h,i,j,k<n }",
+            '''
+            out[g,h,i,j,k] = 2*a[g,h,i,j,k]
+            ''',
+            assumptions="n >= 1",
+            )
+    ref_knl = lp.add_and_infer_dtypes(ref_knl, {"a": np.dtype(np.float32)})
+
+    knl = ref_knl
+    knl = lp.tag_inames(knl, {"h": "vec"})
+    knl_linearized = lp.get_one_linearized_kernel(lp.preprocess_kernel(knl))
+    assert is_innermost("h", knl_linearized.linearization)
+
+    knl = ref_knl
+    knl = lp.tag_inames(knl, {"h": "vec", "g": "l.1", "i": "l.0"})
+    knl_linearized = lp.get_one_linearized_kernel(lp.preprocess_kernel(knl))
+    assert is_innermost("h", knl_linearized.linearization)
+
+    knl = ref_knl
+    knl = lp.tag_inames(
+        knl, {"h": "vec", "g": "l.1", "i": "l.0", "k": "unr"})
+    knl_linearized = lp.get_one_linearized_kernel(lp.preprocess_kernel(knl))
+    assert is_innermost("h", knl_linearized.linearization)
+
+    knl = ref_knl
+    knl = lp.tag_inames(knl, {"h": "vec"})
+    knl = lp.constrain_loop_nesting(knl, must_nest=("k", "i"))
+    knl_linearized = lp.get_one_linearized_kernel(lp.preprocess_kernel(knl))
+    assert is_innermost("h", knl_linearized.linearization)
+    lp.set_caching_enabled(True)
+
+    # try adding a must_nest constraint that conflicts with a vec tag
+    knl = ref_knl
+    knl = lp.tag_inames(knl, {"h": "vec"})
+    try:
+        lp.constrain_loop_nesting(knl, must_nest=("{g,h,i,j}", "{k}"))
+        assert False
+    except ValueError as e:
+        assert (
+            "iname h tagged with ConcurrentTag, "
+            "cannot use iname in must-nest constraint" in str(e))
+
+    # try adding a vec tag that conflicts with a must_nest constraint
+    # TODO uncomment after implemented in tag_inames
+    """
+    knl = ref_knl
+    knl = lp.constrain_loop_nesting(knl, must_nest=("{g,h,i,j}", "{k}"))
+    try:
+        lp.tag_inames(knl, {"h": "vec"})
+        assert False
+    except ValueError as e:
+        assert (
+            "cannot tag 'h' as concurrent--iname involved "
+            "in must-nest constraint" in str(e))
+    """
+
+    # TODO try adding a vec tag forcing h to be innermost, but
+    # also add a must-not-nest constraint preventing h
+    # from nesting inside j
+
+# }}}
+
+
 if __name__ == "__main__":
     if len(sys.argv) > 1:
         exec(sys.argv[1])

From 83885f433767f1dc1954586860333cf8df538c20 Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Mon, 26 Apr 2021 05:46:58 -0500
Subject: [PATCH 351/460] allow debug_args to be passed through from
 get_one_linearized_kernel()

---
 loopy/schedule/__init__.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/loopy/schedule/__init__.py b/loopy/schedule/__init__.py
index d7068fae3..95b11712c 100644
--- a/loopy/schedule/__init__.py
+++ b/loopy/schedule/__init__.py
@@ -2418,7 +2418,7 @@ def print_longest_dead_end():
         key_builder=LoopyKeyBuilder())
 
 
-def _get_one_scheduled_kernel_inner(kernel):
+def _get_one_scheduled_kernel_inner(kernel, debug_args={}):
     # This helper function exists to ensure that the generator chain is fully
     # out of scope after the function returns. This allows it to be
     # garbage-collected in the exit handler of the
@@ -2428,7 +2428,7 @@ def _get_one_scheduled_kernel_inner(kernel):
     #
     # See https://gitlab.tiker.net/inducer/sumpy/issues/31 for context.
 
-    return next(iter(generate_loop_schedules(kernel)))
+    return next(iter(generate_loop_schedules(kernel, debug_args=debug_args)))
 
 
 def get_one_scheduled_kernel(kernel):
@@ -2440,7 +2440,7 @@ def get_one_scheduled_kernel(kernel):
     return get_one_linearized_kernel(kernel)
 
 
-def get_one_linearized_kernel(kernel):
+def get_one_linearized_kernel(kernel, debug_args={}):
     from loopy import CACHING_ENABLED
 
     sched_cache_key = kernel
@@ -2458,7 +2458,7 @@ def get_one_linearized_kernel(kernel):
     if not from_cache:
         with ProcessLogger(logger, "%s: schedule" % kernel.name):
             with MinRecursionLimitForScheduling(kernel):
-                result = _get_one_scheduled_kernel_inner(kernel)
+                result = _get_one_scheduled_kernel_inner(kernel, debug_args)
 
     if CACHING_ENABLED and not from_cache:
         schedule_cache.store_if_not_present(sched_cache_key, result)

From 03cd9f1184d0c7b22647b5f5ec7202c18cedf922 Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Mon, 26 Apr 2021 05:49:08 -0500
Subject: [PATCH 352/460] add test for handling of loop nest constraints during
 linearization (copied in from old branch :
 https://gitlab.tiker.net/jdsteve2/loopy/iname-sets-in-loop-priorities)

---
 test/test_loop_nest_semantics.py | 162 +++++++++++++++++++++++++++++++
 1 file changed, 162 insertions(+)

diff --git a/test/test_loop_nest_semantics.py b/test/test_loop_nest_semantics.py
index 40749fe02..b60cad569 100644
--- a/test/test_loop_nest_semantics.py
+++ b/test/test_loop_nest_semantics.py
@@ -471,6 +471,168 @@ def is_innermost(iname, linearization_items):
 # }}}
 
 
+# {{{ test_linearization_with_nesting_constraints
+
+def test_linearization_with_nesting_constraints():
+
+    def loop_order(linearization_items):
+        from loopy.schedule import EnterLoop
+        order = []
+        for linearization_item in linearization_items:
+            if isinstance(linearization_item, EnterLoop):
+                order.append(linearization_item.iname)
+        return order
+
+    ref_knl = lp.make_kernel(
+            "{ [g,h,i,j,k]: 0<=g,h,i,j,k<n }",
+            '''
+            out[g,h,i,j,k] = 2*a[g,h,i,j,k]
+            ''',
+            assumptions="n >= 1",
+            )
+    ref_knl = lp.add_and_infer_dtypes(ref_knl, {"a": np.dtype(np.float32)})
+
+    # must_nest constraints
+    knl = ref_knl
+    knl = lp.constrain_loop_nesting(
+        knl,
+        must_nest=("i", "j", "h", "k", "g"),
+        )
+    knl_linearized = lp.get_one_linearized_kernel(lp.preprocess_kernel(knl))
+    assert loop_order(knl_linearized.linearization) == ["i", "j", "h", "k", "g"]
+
+    knl = ref_knl
+    knl = lp.constrain_loop_nesting(
+        knl,
+        must_nest=("k", "{g, h, i, j}"),
+        )
+    knl_linearized = lp.get_one_linearized_kernel(lp.preprocess_kernel(knl))
+    assert loop_order(knl_linearized.linearization)[0] == "k"
+
+    knl = ref_knl
+    knl = lp.constrain_loop_nesting(
+        knl,
+        must_nest=("{g, h, i, j}", "k"),
+        )
+    knl_linearized = lp.get_one_linearized_kernel(lp.preprocess_kernel(knl))
+    assert loop_order(knl_linearized.linearization)[-1] == "k"
+
+    knl = ref_knl
+    knl = lp.constrain_loop_nesting(
+        knl,
+        must_nest=("{g, h, i}", "{j, k}"),
+        )
+    knl_linearized = lp.get_one_linearized_kernel(lp.preprocess_kernel(knl))
+    assert set(loop_order(knl_linearized.linearization)[-2:]) == set(["j", "k"])
+
+    knl = ref_knl
+    knl = lp.constrain_loop_nesting(
+        knl,
+        must_nest=("{g, h, i}", "{j, k}"),
+        )
+    knl = lp.constrain_loop_nesting(
+        knl,
+        must_nest=("i", "{g, h}"),
+        )
+    knl_linearized = lp.get_one_linearized_kernel(lp.preprocess_kernel(knl))
+    assert set(loop_order(knl_linearized.linearization)[3:]) == set(["j", "k"])
+    assert set(loop_order(knl_linearized.linearization)[1:3]) == set(["g", "h"])
+    assert loop_order(knl_linearized.linearization)[0] == "i"
+
+    knl = ref_knl
+    knl = lp.constrain_loop_nesting(
+        knl,
+        must_nest=("i", "{g, h}", "{j, k}"),
+        )
+    knl_linearized = lp.get_one_linearized_kernel(lp.preprocess_kernel(knl))
+    assert set(loop_order(knl_linearized.linearization)[3:]) == set(["j", "k"])
+    assert set(loop_order(knl_linearized.linearization)[1:3]) == set(["g", "h"])
+    assert loop_order(knl_linearized.linearization)[0] == "i"
+
+    # must_not_nest constraints
+
+    knl = ref_knl
+    knl = lp.constrain_loop_nesting(
+        knl,
+        must_not_nest=("~k", "k"),
+        )
+    knl_linearized = lp.get_one_linearized_kernel(lp.preprocess_kernel(knl))
+    assert loop_order(knl_linearized.linearization)[0] == "k"
+
+    knl = ref_knl
+    knl = lp.constrain_loop_nesting(
+        knl,
+        must_not_nest=("k", "~k"),
+        )
+    knl_linearized = lp.get_one_linearized_kernel(lp.preprocess_kernel(knl))
+    assert loop_order(knl_linearized.linearization)[-1] == "k"
+
+    knl = ref_knl
+    knl = lp.constrain_loop_nesting(
+        knl,
+        must_not_nest=("{j, k}", "~{j, k}"),
+        )
+    knl_linearized = lp.get_one_linearized_kernel(lp.preprocess_kernel(knl))
+    assert set(loop_order(knl_linearized.linearization)[-2:]) == set(["j", "k"])
+
+    knl = ref_knl
+    knl = lp.constrain_loop_nesting(
+        knl,
+        must_not_nest=("{j, k}", "~{j, k}"),
+        )
+    knl = lp.constrain_loop_nesting(
+        knl,
+        must_nest=("i", "{g, h}"),
+        )
+    knl_linearized = lp.get_one_linearized_kernel(lp.preprocess_kernel(knl))
+    assert set(loop_order(knl_linearized.linearization)[3:]) == set(["j", "k"])
+    assert set(loop_order(knl_linearized.linearization)[1:3]) == set(["g", "h"])
+    assert loop_order(knl_linearized.linearization)[0] == "i"
+
+    # must_nest + must_not_nest
+    knl = ref_knl
+    knl = lp.constrain_loop_nesting(
+        knl,
+        must_nest=("{g, h, i}", "{j, k}"),
+        must_not_nest=("i", "{g, h}"),
+        )
+    knl_linearized = lp.get_one_linearized_kernel(lp.preprocess_kernel(knl))
+    assert set(loop_order(knl_linearized.linearization)[3:]) == set(["j", "k"])
+    assert set(loop_order(knl_linearized.linearization)[0:2]) == set(["g", "h"])
+    assert loop_order(knl_linearized.linearization)[2] == "i"
+
+    knl = ref_knl
+    knl = lp.constrain_loop_nesting(
+        knl,
+        must_not_nest=("i", "~i"),
+        )
+    knl_linearized = lp.get_one_linearized_kernel(lp.preprocess_kernel(knl))
+    assert loop_order(knl_linearized.linearization)[-1] == "i"
+
+    # contradictory must_not_nest
+
+    knl = ref_knl
+    knl = lp.constrain_loop_nesting(
+        knl,
+        must_not_nest=("~k", "k"),
+        )
+    knl = lp.constrain_loop_nesting(
+        knl,
+        must_not_nest=("k", "h"),
+        )
+
+    try:
+        lp.get_one_linearized_kernel(
+            lp.preprocess_kernel(knl),
+            debug_args={"interactive": False},
+            )
+        assert False
+    except RuntimeError as e:
+        assert "no valid schedules found" in str(e)
+
+# }}}
+
+
 if __name__ == "__main__":
     if len(sys.argv) > 1:
         exec(sys.argv[1])

From a75247aecaf673ac4c98add4425321d0005ab280 Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Mon, 26 Apr 2021 05:55:05 -0500
Subject: [PATCH 353/460] fix flake8 issues

---
 test/test_loop_nest_semantics.py | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/test/test_loop_nest_semantics.py b/test/test_loop_nest_semantics.py
index b60cad569..bbfbfad93 100644
--- a/test/test_loop_nest_semantics.py
+++ b/test/test_loop_nest_semantics.py
@@ -340,7 +340,7 @@ def test_adding_multiple_nest_constraints_to_knl():
 def test_incompatible_nest_constraints():
     ref_knl = lp.make_kernel(
             "{ [g,h,i,j,k,x,y,z]: 0<=g,h,i,j,k,x,y,z<n }",
-            '''
+            """
             out[g,h,i,j,k] = 2*a[g,h,i,j,k]
             for x,y
                 out2[x,y] = 2*a2[x,y]
@@ -348,7 +348,7 @@ def test_incompatible_nest_constraints():
                     out3[x,y,z] = 2*a3[x,y,z]
                 end
             end
-            ''',
+            """,
             assumptions="n >= 1",
             )
     ref_knl = lp.add_and_infer_dtypes(ref_knl, {"a,a2,a3": np.dtype(np.float32)})
@@ -409,9 +409,9 @@ def is_innermost(iname, linearization_items):
 
     ref_knl = lp.make_kernel(
             "{ [g,h,i,j,k]: 0<=g,h,i,j,k<n }",
-            '''
+            """
             out[g,h,i,j,k] = 2*a[g,h,i,j,k]
-            ''',
+            """,
             assumptions="n >= 1",
             )
     ref_knl = lp.add_and_infer_dtypes(ref_knl, {"a": np.dtype(np.float32)})
@@ -485,9 +485,9 @@ def loop_order(linearization_items):
 
     ref_knl = lp.make_kernel(
             "{ [g,h,i,j,k]: 0<=g,h,i,j,k<n }",
-            '''
+            """
             out[g,h,i,j,k] = 2*a[g,h,i,j,k]
-            ''',
+            """,
             assumptions="n >= 1",
             )
     ref_knl = lp.add_and_infer_dtypes(ref_knl, {"a": np.dtype(np.float32)})

From fd0c96af62ab7afda100918afef2306aa30aaceb Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Tue, 27 Apr 2021 00:54:02 -0500
Subject: [PATCH 354/460] rename test file
 test_loop_nest_semantics.py->test_nest_constraints.py

---
 test/{test_loop_nest_semantics.py => test_nest_constraints.py} | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 rename test/{test_loop_nest_semantics.py => test_nest_constraints.py} (100%)

diff --git a/test/test_loop_nest_semantics.py b/test/test_nest_constraints.py
similarity index 100%
rename from test/test_loop_nest_semantics.py
rename to test/test_nest_constraints.py

From 764d9032ea3150578d9760573a0abafed2977994 Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Tue, 27 Apr 2021 01:50:25 -0500
Subject: [PATCH 355/460] copy in and update old functions for replacing inames
 in nest constraints (from old branch :
 https://gitlab.tiker.net/jdsteve2/loopy/iname-sets-in-loop-priorities)

---
 loopy/transform/iname.py | 303 ++++++++++++++++++++++++++++++++++++++-
 1 file changed, 301 insertions(+), 2 deletions(-)

diff --git a/loopy/transform/iname.py b/loopy/transform/iname.py
index ba0d9f7c2..d2f41d5fc 100644
--- a/loopy/transform/iname.py
+++ b/loopy/transform/iname.py
@@ -113,7 +113,7 @@ def prioritize_loops(kernel, loop_priority):
 # }}}
 
 
-# {{{ loop nest constraints
+# {{{ handle loop nest constraints
 
 # {{{ classes to house loop nest constraints
 
@@ -727,6 +727,287 @@ def get_graph_sources(graph):
 
 # }}}
 
+
+# {{{ updating constraints during transformation
+
+# {{{ replace_inames_in_nest_constraints
+
+def replace_inames_in_nest_constraints(
+        inames_to_replace, replacement_inames, old_constraints,
+        coalesce_duplicate_replacement_inames=False):
+    """
+    :arg inames_to_replace: A set of inames that may exist in
+        `old_constraints`, each of which is to be replaced with all inames
+        in `replacement_inames`.
+
+    :arg replacement_inames: A set of inames, all of which will repalce each
+        iname in `inames_to_replace` in `old_constraints`.
+
+    :arg old_constraints: An iterable of tuples containing one or more
+        :class:`UnexpandedInameSet` objects.
+    """
+
+    # replace each iname in inames_to_replace
+    # with *all* inames in replacement_inames
+
+    # loop through old_constraints and handle each nesting independently
+    new_constraints = set()
+    for old_nesting in old_constraints:
+        # loop through each iname_set in this nesting and perform replacement
+        new_nesting = []
+        for iname_set in old_nesting:
+
+            # find inames to be replaced
+            inames_found = inames_to_replace & iname_set.inames
+
+            # create the new set of inames with the replacements
+            if inames_found:
+                new_inames = iname_set.inames - inames_found
+                new_inames.update(replacement_inames)
+            else:
+                new_inames = iname_set.inames.copy()
+
+            new_nesting.append(
+                UnexpandedInameSet(new_inames, iname_set.complement))
+
+        # if we've removed things, new_nesting might only contain 1 item,
+        # in which case it's meaningless and we should just remove it
+        if len(new_nesting) > 1:
+            new_constraints.add(tuple(new_nesting))
+
+    # When joining inames, we may need to coalesce:
+    # e.g., if we join `i` and `j` into `ij`, and old_nesting was
+    # [{i, k}, {j, h}], at this point we have [{ij, k}, {ij, h}]
+    # which contains a cycle. If coalescing is enabled, change this
+    # to [{k}, ij, {h}] to remove the cycle.
+    if coalesce_duplicate_replacement_inames:
+
+        def coalesce_duplicate_inames_in_nesting(nesting, iname_candidates):
+            # TODO would like this to be fully generic, but for now, assumes
+            # all UnexpandedInameSets have complement=False, which works if
+            # we're only using this for must_nest constraints since they cannot
+            # have complements
+            for iname_set in nesting:
+                assert not iname_set.complement
+
+            import copy
+            # copy and convert nesting to list so we can modify
+            coalesced_nesting = list(copy.deepcopy(nesting))
+
+            # repeat coalescing step until we don't find any adjacent pairs
+            # containing duplicates (among iname_candidates)
+            found_duplicates = True
+            while found_duplicates:
+                found_duplicates = False
+                # loop through each iname_set in nesting and coalesce
+                # (assume new_nesting has at least 2 items)
+                i = 0
+                while i < len(coalesced_nesting)-1:
+                    iname_set_before = coalesced_nesting[i]
+                    iname_set_after = coalesced_nesting[i+1]
+                    # coalesce for each iname candidate
+                    for iname in iname_candidates:
+                        if (iname_set_before.inames == set([iname, ]) and
+                                iname_set_after.inames == set([iname, ])):
+                            # before/after contain single iname to be coalesced,
+                            # -> remove iname_set_after
+                            del coalesced_nesting[i+1]
+                            found_duplicates = True
+                        elif (iname_set_before.inames == set([iname, ]) and
+                                iname in iname_set_after.inames):
+                            # before contains single iname to be coalesced,
+                            # after contains iname along with others,
+                            # -> remove iname from iname_set_after.inames
+                            coalesced_nesting[i+1] = UnexpandedInameSet(
+                                inames=iname_set_after.inames - set([iname, ]),
+                                complement=iname_set_after.complement,
+                                )
+                            found_duplicates = True
+                        elif (iname in iname_set_before.inames and
+                                iname_set_after.inames == set([iname, ])):
+                            # after contains single iname to be coalesced,
+                            # before contains iname along with others,
+                            # -> remove iname from iname_set_before.inames
+                            coalesced_nesting[i] = UnexpandedInameSet(
+                                inames=iname_set_before.inames - set([iname, ]),
+                                complement=iname_set_before.complement,
+                                )
+                            found_duplicates = True
+                        elif (iname in iname_set_before.inames and
+                                iname in iname_set_after.inames):
+                            # before and after contain iname along with others,
+                            # -> remove iname from iname_set_{before,after}.inames
+                            # and insert it in between them
+                            coalesced_nesting[i] = UnexpandedInameSet(
+                                inames=iname_set_before.inames - set([iname, ]),
+                                complement=iname_set_before.complement,
+                                )
+                            coalesced_nesting[i+1] = UnexpandedInameSet(
+                                inames=iname_set_after.inames - set([iname, ]),
+                                complement=iname_set_after.complement,
+                                )
+                            coalesced_nesting.insert(i+1, UnexpandedInameSet(
+                                inames=set([iname, ]),
+                                complement=False,
+                                ))
+                            found_duplicates = True
+                        # else, iname was not found in both sets, so do nothing
+                    i = i + 1
+
+            return tuple(coalesced_nesting)
+
+        # loop through new_constraints; handle each nesting independently
+        coalesced_constraints = set()
+        for new_nesting in new_constraints:
+            coalesced_constraints.add(
+                coalesce_duplicate_inames_in_nesting(
+                    new_nesting, replacement_inames))
+
+        return coalesced_constraints
+    else:
+        return new_constraints
+
+# }}}
+
+
+# {{{ replace_inames_in_graph
+
+def replace_inames_in_graph(
+        inames_to_replace, replacement_inames, old_graph):
+    # replace each iname in inames_to_replace with all inames in replacement_inames
+
+    new_graph = {}
+    iname_to_replace_found_as_key = False
+    union_of_inames_after_for_replaced_keys = set()
+    for iname, inames_after in old_graph.items():
+        # create new inames_after
+        new_inames_after = inames_after.copy()
+        inames_found = inames_to_replace & new_inames_after
+
+        if inames_found:
+            new_inames_after -= inames_found
+            new_inames_after.update(replacement_inames)
+
+        # update dict
+        if iname in inames_to_replace:
+            iname_to_replace_found_as_key = True
+            union_of_inames_after_for_replaced_keys = \
+                union_of_inames_after_for_replaced_keys | new_inames_after
+            # don't add this iname as a key in new graph,
+            # its replacements will be added below
+        else:
+            new_graph[iname] = new_inames_after
+
+    # add replacement iname keys
+    if iname_to_replace_found_as_key:
+        for new_key in replacement_inames:
+            new_graph[new_key] = union_of_inames_after_for_replaced_keys.copy()
+
+    # check for cycle
+    from pytools.graph import contains_cycle
+    if contains_cycle(new_graph):
+        raise ValueError(
+            "replace_inames_in_graph: Loop priority cycle detected. "
+            "Cannot replace inames %s with inames %s."
+            % (inames_to_replace, replacement_inames))
+
+    return new_graph
+
+# }}}
+
+
+# {{{ replace_inames_in_all_nest_constraints
+
+def replace_inames_in_all_nest_constraints(
+        kernel, old_inames, new_inames,
+        coalesce_duplicate_replacement_inames=False,
+        pairs_that_must_not_voilate_constraints=set(),
+        ):
+    # replace each iname in old_inames with all inames in new_inames
+    # TODO What was pairs_that_must_not_voilate_constraints used for???
+    # TODO handle case where we want to keep old inames around
+
+    # get old must_nest and must_not_nest
+    # (must_nest_graph will be rebuilt)
+    if kernel.loop_nest_constraints:
+        old_must_nest = kernel.loop_nest_constraints.must_nest
+        old_must_not_nest = kernel.loop_nest_constraints.must_not_nest
+        # (these could still be None)
+    else:
+        old_must_nest = None
+        old_must_not_nest = None
+
+    if old_must_nest:
+        # check to make sure special pairs don't conflict with constraints
+        for iname_before, iname_after in pairs_that_must_not_voilate_constraints:
+            if iname_before in kernel.loop_nest_constraints.must_nest_graph[
+                    iname_after]:
+                raise ValueError(
+                    "Implied nestings violate existing must-nest constraints."
+                    "\nimplied nestings: %s\nmust-nest constraints: %s"
+                    % (pairs_that_must_not_voilate_constraints, old_must_nest))
+
+        new_must_nest = replace_inames_in_nest_constraints(
+            old_inames, new_inames, old_must_nest,
+            coalesce_duplicate_replacement_inames,
+            )
+    else:
+        new_must_nest = None
+
+    if old_must_not_nest:
+        # check to make sure special pairs don't conflict with constraints
+        if not check_all_must_not_nests(
+                pairs_that_must_not_voilate_constraints, old_must_not_nest):
+            raise ValueError(
+                "Implied nestings violate existing must-not-nest constraints."
+                "\nimplied nestings: %s\nmust-not-nest constraints: %s"
+                % (pairs_that_must_not_voilate_constraints, old_must_not_nest))
+
+        new_must_not_nest = replace_inames_in_nest_constraints(
+            old_inames, new_inames, old_must_not_nest,
+            coalesce_duplicate_replacement_inames=False,
+            # (for now, never coalesce must-not-nest constraints)
+            )
+        # each must not nest constraint may only contain two tiers
+        # TODO coalesce_duplicate_replacement_inames?
+    else:
+        new_must_not_nest = None
+
+    # Rebuild must_nest graph
+    if new_must_nest:
+        new_must_nest_graph = {}
+        new_all_inames = (
+            kernel.all_inames() - set(old_inames)) | set(new_inames)
+        from pytools.graph import CycleError
+        for must_nest_tuple in new_must_nest:
+            try:
+                new_must_nest_graph = update_must_nest_graph(
+                    new_must_nest_graph, must_nest_tuple, new_all_inames)
+            except CycleError:
+                raise ValueError(
+                    "Loop priority cycle detected when replacing inames %s "
+                    "with inames %s. Previous must_nest constraints: %s"
+                    % (old_inames, new_inames, old_must_nest))
+
+        # make sure none of the must_nest constraints violate must_not_nest
+        # this may not catch all problems
+        check_must_not_nest_against_must_nest_graph(
+            new_must_not_nest, new_must_nest_graph)
+    else:
+        new_must_nest_graph = None
+
+    return kernel.copy(
+            loop_nest_constraints=LoopNestConstraints(
+                must_nest=new_must_nest,
+                must_not_nest=new_must_not_nest,
+                must_nest_graph=new_must_nest_graph,
+                )
+            )
+
+# }}}
+
+# }}}
+
 # }}}
 
 
@@ -991,6 +1272,24 @@ def _split_iname_in_dependee(dep):
                 new_prio = new_prio + (prio_iname,)
         new_priorities.append(new_prio)
 
+    # {{{ update nest constraints
+
+    # TODO remove this
+    if within != parse_match(None):
+        raise NotImplementedError("within")
+
+    # TODO due to 'within', should do this instead:
+    # Duplicate each constraint tuple containing split_iname,
+    # replace split_iname with (inner,outer) in the copy, while still keeping
+    # the original around. Then let remove_unused_inames handle removal of
+    # the old iname if necessary
+
+    # update must_nest, must_not_nest, and must_nest_graph
+    kernel = replace_inames_in_all_nest_constraints(
+        kernel, set([iname_to_split, ]), [inner_iname, outer_iname])
+
+    # }}}
+
     kernel = kernel.copy(
             domains=new_domains,
             iname_slab_increments=iname_slab_increments,
@@ -1219,7 +1518,7 @@ def join_inames(kernel, inames, new_iname=None, tag=None, within=None):
     from loopy.match import parse_match
     within = parse_match(within)
 
-    # {{{ return the same kernel if no kernel matches
+    # {{{ return the same kernel if no insn matches
 
     if not any(within(kernel, insn) for insn in kernel.instructions):
         return kernel

From c568e3b337b282d04962188ce0556e70a82f8e0f Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Tue, 27 Apr 2021 01:51:34 -0500
Subject: [PATCH 356/460] add test for nest constraint updating during
 split_iname

---
 test/test_nest_constraints.py | 83 +++++++++++++++++++++++++++++++++++
 1 file changed, 83 insertions(+)

diff --git a/test/test_nest_constraints.py b/test/test_nest_constraints.py
index bbfbfad93..edfdc50c3 100644
--- a/test/test_nest_constraints.py
+++ b/test/test_nest_constraints.py
@@ -633,6 +633,89 @@ def loop_order(linearization_items):
 # }}}
 
 
+# {{{ test constraint updating during transformation
+
+# {{{ test_constraint_updating_split_iname
+
+def test_constraint_updating_split_iname():
+
+    def loop_order(linearization_items):
+        from loopy.schedule import EnterLoop
+        order = []
+        for linearization_item in linearization_items:
+            if isinstance(linearization_item, EnterLoop):
+                order.append(linearization_item.iname)
+        return order
+
+    ref_knl = lp.make_kernel(
+            "{ [g,h,i,j,k]: 0<=g,h,i,j,k<n }",
+            '''
+            out[g,h,i,j,k] = 2*a[g,h,i,j,k]  {id=insn}
+            ''',
+            assumptions="n >= 1",
+            )
+    ref_knl = lp.add_and_infer_dtypes(ref_knl, {"a": np.dtype(np.float32)})
+
+    # split_iname
+    knl = ref_knl
+    knl = lp.constrain_loop_nesting(
+        knl,
+        must_nest=("k", "{g, h, i, j}"),
+        )
+    knl = lp.split_iname(knl, "j", 4)
+    knl_linearized = lp.get_one_linearized_kernel(lp.preprocess_kernel(knl))
+    assert loop_order(knl_linearized.linearization)[0] == "k"
+
+    knl = ref_knl
+    knl = lp.constrain_loop_nesting(
+        knl,
+        must_nest=("{g, h, i, j}", "k"),
+        )
+    knl = lp.split_iname(knl, "j", 4)
+    knl_linearized = lp.get_one_linearized_kernel(lp.preprocess_kernel(knl))
+    assert loop_order(knl_linearized.linearization)[-1] == "k"
+
+    knl = ref_knl
+    knl = lp.constrain_loop_nesting(
+        knl,
+        #must_nest=("{g, h, i}", "{j, k}"),
+        must_not_nest=("{j, k}", "~{j, k}"),
+        )
+    knl = lp.constrain_loop_nesting(
+        knl,
+        must_nest=("i", "{g, h}"),
+        )
+    knl = lp.split_iname(knl, "g", 4)
+    knl = lp.split_iname(knl, "j", 4)
+    knl_linearized = lp.get_one_linearized_kernel(lp.preprocess_kernel(knl))
+    assert loop_order(knl_linearized.linearization)[0] == "i"
+    assert set(loop_order(knl_linearized.linearization)[1:4]) == set(
+        ["g_outer", "g_inner", "h"])
+    assert set(loop_order(knl_linearized.linearization)[4:]) == set(
+        ["j_outer", "j_inner", "k"])
+
+    knl = ref_knl
+    knl = lp.constrain_loop_nesting(
+        knl,
+        must_nest=("i", "{g, h, j, k}"),
+        must_not_nest=("h", "g"),
+        )
+    knl = lp.constrain_loop_nesting(
+        knl,
+        must_nest=("{g, h, i}", "{j, k}"),
+        )
+    knl = lp.split_iname(knl, "g", 4)
+    knl_linearized = lp.get_one_linearized_kernel(lp.preprocess_kernel(knl))
+    assert loop_order(knl_linearized.linearization)[0] == "i"
+    assert loop_order(knl_linearized.linearization)[1:4] == [
+        "g_outer", "g_inner", "h"]
+    assert set(loop_order(knl_linearized.linearization)[4:]) == set(["j", "k"])
+
+# }}}
+
+# }}}
+
+
 if __name__ == "__main__":
     if len(sys.argv) > 1:
         exec(sys.argv[1])

From d78ac5b6346812872b8e55082e624b7502081f95 Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Tue, 27 Apr 2021 02:43:08 -0500
Subject: [PATCH 357/460] allow replaced inames to be kept around when
 replacing inames in constraints; correctly handle 'within' in split_iname by
 keeping original iname around and having remove_unused_inames remove it if
 necessary

---
 loopy/transform/iname.py | 55 +++++++++++++++++++++++++++-------------
 1 file changed, 37 insertions(+), 18 deletions(-)

diff --git a/loopy/transform/iname.py b/loopy/transform/iname.py
index d2f41d5fc..4a38e73ac 100644
--- a/loopy/transform/iname.py
+++ b/loopy/transform/iname.py
@@ -734,7 +734,9 @@ def get_graph_sources(graph):
 
 def replace_inames_in_nest_constraints(
         inames_to_replace, replacement_inames, old_constraints,
-        coalesce_duplicate_replacement_inames=False):
+        coalesce_duplicate_replacement_inames=False,
+        keep_old_inames=False,
+        ):
     """
     :arg inames_to_replace: A set of inames that may exist in
         `old_constraints`, each of which is to be replaced with all inames
@@ -762,7 +764,11 @@ def replace_inames_in_nest_constraints(
 
             # create the new set of inames with the replacements
             if inames_found:
-                new_inames = iname_set.inames - inames_found
+                if keep_old_inames:
+                    # TODO is copy necessary?
+                    new_inames = iname_set.inames.copy()
+                else:
+                    new_inames = iname_set.inames - inames_found
                 new_inames.update(replacement_inames)
             else:
                 new_inames = iname_set.inames.copy()
@@ -782,7 +788,7 @@ def replace_inames_in_nest_constraints(
     # to [{k}, ij, {h}] to remove the cycle.
     if coalesce_duplicate_replacement_inames:
 
-        def coalesce_duplicate_inames_in_nesting(nesting, iname_candidates):
+        def coalesce_duplicate_inames_in_nesting(nesting, coalesce_candidates):
             # TODO would like this to be fully generic, but for now, assumes
             # all UnexpandedInameSets have complement=False, which works if
             # we're only using this for must_nest constraints since they cannot
@@ -795,7 +801,7 @@ def coalesce_duplicate_inames_in_nesting(nesting, iname_candidates):
             coalesced_nesting = list(copy.deepcopy(nesting))
 
             # repeat coalescing step until we don't find any adjacent pairs
-            # containing duplicates (among iname_candidates)
+            # containing duplicates (among coalesce_candidates)
             found_duplicates = True
             while found_duplicates:
                 found_duplicates = False
@@ -806,7 +812,7 @@ def coalesce_duplicate_inames_in_nesting(nesting, iname_candidates):
                     iname_set_before = coalesced_nesting[i]
                     iname_set_after = coalesced_nesting[i+1]
                     # coalesce for each iname candidate
-                    for iname in iname_candidates:
+                    for iname in coalesce_candidates:
                         if (iname_set_before.inames == set([iname, ]) and
                                 iname_set_after.inames == set([iname, ])):
                             # before/after contain single iname to be coalesced,
@@ -922,6 +928,7 @@ def replace_inames_in_all_nest_constraints(
         kernel, old_inames, new_inames,
         coalesce_duplicate_replacement_inames=False,
         pairs_that_must_not_voilate_constraints=set(),
+        keep_old_inames=False,
         ):
     # replace each iname in old_inames with all inames in new_inames
     # TODO What was pairs_that_must_not_voilate_constraints used for???
@@ -949,7 +956,8 @@ def replace_inames_in_all_nest_constraints(
 
         new_must_nest = replace_inames_in_nest_constraints(
             old_inames, new_inames, old_must_nest,
-            coalesce_duplicate_replacement_inames,
+            coalesce_duplicate_replacement_inames=coalesce_duplicate_replacement_inames,
+            keep_old_inames=keep_old_inames,
             )
     else:
         new_must_nest = None
@@ -967,6 +975,7 @@ def replace_inames_in_all_nest_constraints(
             old_inames, new_inames, old_must_not_nest,
             coalesce_duplicate_replacement_inames=False,
             # (for now, never coalesce must-not-nest constraints)
+            keep_old_inames=keep_old_inames,
             )
         # each must not nest constraint may only contain two tiers
         # TODO coalesce_duplicate_replacement_inames?
@@ -976,8 +985,11 @@ def replace_inames_in_all_nest_constraints(
     # Rebuild must_nest graph
     if new_must_nest:
         new_must_nest_graph = {}
-        new_all_inames = (
-            kernel.all_inames() - set(old_inames)) | set(new_inames)
+        if keep_old_inames:
+            new_all_inames = kernel.all_inames() | set(new_inames)
+        else:
+            new_all_inames = (
+                kernel.all_inames() - set(old_inames)) | set(new_inames)
         from pytools.graph import CycleError
         for must_nest_tuple in new_must_nest:
             try:
@@ -1274,19 +1286,15 @@ def _split_iname_in_dependee(dep):
 
     # {{{ update nest constraints
 
-    # TODO remove this
-    if within != parse_match(None):
-        raise NotImplementedError("within")
-
-    # TODO due to 'within', should do this instead:
-    # Duplicate each constraint tuple containing split_iname,
-    # replace split_iname with (inner,outer) in the copy, while still keeping
-    # the original around. Then let remove_unused_inames handle removal of
-    # the old iname if necessary
+    # Add {inner,outer} wherever iname_to_split is found in constraints, while
+    # still keeping the original around. Then let remove_unused_inames handle
+    # removal of the old iname if necessary
 
     # update must_nest, must_not_nest, and must_nest_graph
     kernel = replace_inames_in_all_nest_constraints(
-        kernel, set([iname_to_split, ]), [inner_iname, outer_iname])
+        kernel, set([iname_to_split, ]), [inner_iname, outer_iname],
+        keep_old_inames=True,
+        )
 
     # }}}
 
@@ -2275,6 +2283,17 @@ def _remove_iname_from_dep(dep):
 
     # }}}
 
+    # # {{{ Remove inames from loop nest constraints
+
+    kernel = replace_inames_in_all_nest_constraints(
+        kernel, old_inames=unused_inames, new_inames=[],
+        coalesce_duplicate_replacement_inames=False,
+        pairs_that_must_not_voilate_constraints=set(),
+        keep_old_inames=False,
+        )
+
+    # }}}
+
     return kernel
 
 

From 1218f18eb74d54dd777aa79d23cc83deb237dd9c Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Tue, 27 Apr 2021 02:44:26 -0500
Subject: [PATCH 358/460] test constraint handling with 'within' in
 splilt_iname

---
 test/test_nest_constraints.py | 39 +++++++++++++++++++++++++++++++++++
 1 file changed, 39 insertions(+)

diff --git a/test/test_nest_constraints.py b/test/test_nest_constraints.py
index edfdc50c3..f2d6d56d0 100644
--- a/test/test_nest_constraints.py
+++ b/test/test_nest_constraints.py
@@ -639,6 +639,8 @@ def loop_order(linearization_items):
 
 def test_constraint_updating_split_iname():
 
+    from loopy.transform.iname import get_iname_nestings
+
     def loop_order(linearization_items):
         from loopy.schedule import EnterLoop
         order = []
@@ -711,8 +713,45 @@ def loop_order(linearization_items):
         "g_outer", "g_inner", "h"]
     assert set(loop_order(knl_linearized.linearization)[4:]) == set(["j", "k"])
 
+    # Testing split_iname with 'within'
+
+    ref_knl = lp.make_kernel(
+            "{ [g,h,i,j,k]: 0<=g,h,i,j,k<n }",
+            '''
+            out1[g,h,i,j,k] = 2*a[g,h,i,j,k]  {id=insn1}
+            out2[i,j,k] = 2+i+j+k  {id=insn2}
+            ''',
+            assumptions="n >= 1",
+            )
+    ref_knl = lp.add_and_infer_dtypes(
+        ref_knl, {"a": np.dtype(np.float32)}, {"out2": np.dtype(np.float32)})
+    ref_knl = lp.constrain_loop_nesting(
+        ref_knl,
+        must_nest=("k", "i", "j"),
+        must_not_nest=("g", "{j,h}"),
+        )
+
+    knl = ref_knl
+    knl = lp.split_iname(knl, "j", 4, within="id:insn1")
+    knl_linearized = lp.get_one_linearized_kernel(lp.preprocess_kernel(knl))
+    nestings_found = get_iname_nestings(knl_linearized.linearization)
+    assert ("k", "i", "j_outer", "j_inner", "h", "g") in nestings_found
+    assert ("k", "i", "j") in nestings_found
+    assert len(nestings_found) == 2
+
+    knl = ref_knl
+    knl = lp.split_iname(knl, "j", 4, within="id:insn2")
+    knl_linearized = lp.get_one_linearized_kernel(lp.preprocess_kernel(knl))
+    nestings_found = get_iname_nestings(knl_linearized.linearization)
+    assert ("k", "i", "j", "h", "g") in nestings_found
+    assert ("k", "i", "j_outer", "j_inner") in nestings_found
+    assert len(nestings_found) == 2
+
 # }}}
 
+# TODO make standalone test for constraint updating functions that
+# doesn't bother with transforms/linearization
+
 # }}}
 
 

From eeaa6fd6c46811316f2960a29510e622b6e249df Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Tue, 27 Apr 2021 02:56:53 -0500
Subject: [PATCH 359/460] clean up test_constraint_updating_split_iname with
 better helper function

---
 test/test_nest_constraints.py | 63 ++++++++++++++++-------------------
 1 file changed, 28 insertions(+), 35 deletions(-)

diff --git a/test/test_nest_constraints.py b/test/test_nest_constraints.py
index f2d6d56d0..ccedff52f 100644
--- a/test/test_nest_constraints.py
+++ b/test/test_nest_constraints.py
@@ -641,32 +641,30 @@ def test_constraint_updating_split_iname():
 
     from loopy.transform.iname import get_iname_nestings
 
-    def loop_order(linearization_items):
-        from loopy.schedule import EnterLoop
-        order = []
-        for linearization_item in linearization_items:
-            if isinstance(linearization_item, EnterLoop):
-                order.append(linearization_item.iname)
-        return order
+    def linearize_and_get_nestings(unlinearized_knl):
+        knl_linearized = lp.get_one_linearized_kernel(
+            lp.preprocess_kernel(unlinearized_knl))
+        return get_iname_nestings(knl_linearized.linearization)
 
     ref_knl = lp.make_kernel(
             "{ [g,h,i,j,k]: 0<=g,h,i,j,k<n }",
-            '''
+            """
             out[g,h,i,j,k] = 2*a[g,h,i,j,k]  {id=insn}
-            ''',
+            """,
             assumptions="n >= 1",
             )
     ref_knl = lp.add_and_infer_dtypes(ref_knl, {"a": np.dtype(np.float32)})
 
-    # split_iname
+    # Test split_iname where 'within'=None
+
     knl = ref_knl
     knl = lp.constrain_loop_nesting(
         knl,
         must_nest=("k", "{g, h, i, j}"),
         )
     knl = lp.split_iname(knl, "j", 4)
-    knl_linearized = lp.get_one_linearized_kernel(lp.preprocess_kernel(knl))
-    assert loop_order(knl_linearized.linearization)[0] == "k"
+    loop_nesting = linearize_and_get_nestings(knl)[0]  # only one nesting
+    assert loop_nesting[0] == "k"
 
     knl = ref_knl
     knl = lp.constrain_loop_nesting(
@@ -674,8 +672,8 @@ def loop_order(linearization_items):
         must_nest=("{g, h, i, j}", "k"),
         )
     knl = lp.split_iname(knl, "j", 4)
-    knl_linearized = lp.get_one_linearized_kernel(lp.preprocess_kernel(knl))
-    assert loop_order(knl_linearized.linearization)[-1] == "k"
+    loop_nesting = linearize_and_get_nestings(knl)[0]  # only one nesting
+    assert loop_nesting[-1] == "k"
 
     knl = ref_knl
     knl = lp.constrain_loop_nesting(
@@ -689,12 +687,10 @@ def loop_order(linearization_items):
         )
     knl = lp.split_iname(knl, "g", 4)
     knl = lp.split_iname(knl, "j", 4)
-    knl_linearized = lp.get_one_linearized_kernel(lp.preprocess_kernel(knl))
-    assert loop_order(knl_linearized.linearization)[0] == "i"
-    assert set(loop_order(knl_linearized.linearization)[1:4]) == set(
-        ["g_outer", "g_inner", "h"])
-    assert set(loop_order(knl_linearized.linearization)[4:]) == set(
-        ["j_outer", "j_inner", "k"])
+    loop_nesting = linearize_and_get_nestings(knl)[0]  # only one nesting
+    assert loop_nesting[0] == "i"
+    assert set(loop_nesting[1:4]) == set(["g_outer", "g_inner", "h"])
+    assert set(loop_nesting[4:]) == set(["j_outer", "j_inner", "k"])
 
     knl = ref_knl
     knl = lp.constrain_loop_nesting(
@@ -707,11 +703,10 @@ def loop_order(linearization_items):
         must_nest=("{g, h, i}", "{j, k}"),
         )
     knl = lp.split_iname(knl, "g", 4)
-    knl_linearized = lp.get_one_linearized_kernel(lp.preprocess_kernel(knl))
-    assert loop_order(knl_linearized.linearization)[0] == "i"
-    assert loop_order(knl_linearized.linearization)[1:4] == [
-        "g_outer", "g_inner", "h"]
-    assert set(loop_order(knl_linearized.linearization)[4:]) == set(["j", "k"])
+    loop_nesting = linearize_and_get_nestings(knl)[0]  # only one nesting
+    assert loop_nesting[0] == "i"
+    assert loop_nesting[1:4] == ("g_outer", "g_inner", "h")
+    assert set(loop_nesting[4:]) == set(["j", "k"])
 
     # Testing split_iname with 'within'
 
@@ -733,19 +728,17 @@ def loop_order(linearization_items):
 
     knl = ref_knl
     knl = lp.split_iname(knl, "j", 4, within="id:insn1")
-    knl_linearized = lp.get_one_linearized_kernel(lp.preprocess_kernel(knl))
-    nestings_found = get_iname_nestings(knl_linearized.linearization)
-    assert ("k", "i", "j_outer", "j_inner", "h", "g") in nestings_found
-    assert ("k", "i", "j") in nestings_found
-    assert len(nestings_found) == 2
+    loop_nestings = linearize_and_get_nestings(knl)
+    assert ("k", "i", "j_outer", "j_inner", "h", "g") in loop_nestings
+    assert ("k", "i", "j") in loop_nestings
+    assert len(loop_nestings) == 2
 
     knl = ref_knl
     knl = lp.split_iname(knl, "j", 4, within="id:insn2")
-    knl_linearized = lp.get_one_linearized_kernel(lp.preprocess_kernel(knl))
-    nestings_found = get_iname_nestings(knl_linearized.linearization)
-    assert ("k", "i", "j", "h", "g") in nestings_found
-    assert ("k", "i", "j_outer", "j_inner") in nestings_found
-    assert len(nestings_found) == 2
+    loop_nestings = linearize_and_get_nestings(knl)
+    assert ("k", "i", "j", "h", "g") in loop_nestings
+    assert ("k", "i", "j_outer", "j_inner") in loop_nestings
+    assert len(loop_nestings) == 2
 
 # }}}
 

From 1de8178448f3e0cb57618ce0d1f6f0a1cb91b840 Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Tue, 27 Apr 2021 03:03:11 -0500
Subject: [PATCH 360/460] shorten variable names to clean up test

---
 test/test_nest_constraints.py | 106 +++++++++++++++++-----------------
 1 file changed, 53 insertions(+), 53 deletions(-)

diff --git a/test/test_nest_constraints.py b/test/test_nest_constraints.py
index ccedff52f..1d7156843 100644
--- a/test/test_nest_constraints.py
+++ b/test/test_nest_constraints.py
@@ -381,14 +381,14 @@ def test_incompatible_nest_constraints():
 
 def test_vec_innermost():
 
-    def is_innermost(iname, linearization_items):
+    def is_innermost(iname, lin_items):
         from loopy.schedule import (EnterLoop, LeaveLoop)
 
         # find EnterLoop(iname) in linearization
         enter_iname_idx = None
-        for i, linearization_item in enumerate(linearization_items):
-            if isinstance(linearization_item, EnterLoop) and (
-                    linearization_item.iname == iname):
+        for i, lin_item in enumerate(lin_items):
+            if isinstance(lin_item, EnterLoop) and (
+                    lin_item.iname == iname):
                 enter_iname_idx = i
                 break
         else:
@@ -396,12 +396,12 @@ def is_innermost(iname, linearization_items):
             return False
 
         # now go through remaining linearization items after EnterLoop(iname)
-        for linearization_item in linearization_items[enter_iname_idx+1:]:
-            if isinstance(linearization_item, LeaveLoop):
+        for lin_item in lin_items[enter_iname_idx+1:]:
+            if isinstance(lin_item, LeaveLoop):
                 # Break as soon as we find a LeaveLoop
                 # If this happens before we find an EnterLoop, iname is innermost
                 break
-            elif isinstance(linearization_item, EnterLoop):
+            elif isinstance(lin_item, EnterLoop):
                 # we found an EnterLoop inside iname
                 return False
 
@@ -418,25 +418,25 @@ def is_innermost(iname, linearization_items):
 
     knl = ref_knl
     knl = lp.tag_inames(knl, {"h": "vec"})
-    knl_linearized = lp.get_one_linearized_kernel(lp.preprocess_kernel(knl))
-    assert is_innermost("h", knl_linearized.linearization)
+    lin_knl = lp.get_one_linearized_kernel(lp.preprocess_kernel(knl))
+    assert is_innermost("h", lin_knl.linearization)
 
     knl = ref_knl
     knl = lp.tag_inames(knl, {"h": "vec", "g": "l.1", "i": "l.0"})
-    knl_linearized = lp.get_one_linearized_kernel(lp.preprocess_kernel(knl))
-    assert is_innermost("h", knl_linearized.linearization)
+    lin_knl = lp.get_one_linearized_kernel(lp.preprocess_kernel(knl))
+    assert is_innermost("h", lin_knl.linearization)
 
     knl = ref_knl
     knl = lp.tag_inames(
         knl, {"h": "vec", "g": "l.1", "i": "l.0", "k": "unr"})
-    knl_linearized = lp.get_one_linearized_kernel(lp.preprocess_kernel(knl))
-    assert is_innermost("h", knl_linearized.linearization)
+    lin_knl = lp.get_one_linearized_kernel(lp.preprocess_kernel(knl))
+    assert is_innermost("h", lin_knl.linearization)
 
     knl = ref_knl
     knl = lp.tag_inames(knl, {"h": "vec"})
     knl = lp.constrain_loop_nesting(knl, must_nest=("k", "i"))
-    knl_linearized = lp.get_one_linearized_kernel(lp.preprocess_kernel(knl))
-    assert is_innermost("h", knl_linearized.linearization)
+    lin_knl = lp.get_one_linearized_kernel(lp.preprocess_kernel(knl))
+    assert is_innermost("h", lin_knl.linearization)
     lp.set_caching_enabled(True)
 
     # try adding a must_nest constraint that conflicts with a vec tag
@@ -475,12 +475,12 @@ def is_innermost(iname, linearization_items):
 
 def test_linearization_with_nesting_constraints():
 
-    def loop_order(linearization_items):
+    def loop_order(lin_items):
         from loopy.schedule import EnterLoop
         order = []
-        for linearization_item in linearization_items:
-            if isinstance(linearization_item, EnterLoop):
-                order.append(linearization_item.iname)
+        for lin_item in lin_items:
+            if isinstance(lin_item, EnterLoop):
+                order.append(lin_item.iname)
         return order
 
     ref_knl = lp.make_kernel(
@@ -498,32 +498,32 @@ def loop_order(linearization_items):
         knl,
         must_nest=("i", "j", "h", "k", "g"),
         )
-    knl_linearized = lp.get_one_linearized_kernel(lp.preprocess_kernel(knl))
-    assert loop_order(knl_linearized.linearization) == ["i", "j", "h", "k", "g"]
+    lin_knl = lp.get_one_linearized_kernel(lp.preprocess_kernel(knl))
+    assert loop_order(lin_knl.linearization) == ["i", "j", "h", "k", "g"]
 
     knl = ref_knl
     knl = lp.constrain_loop_nesting(
         knl,
         must_nest=("k", "{g, h, i, j}"),
         )
-    knl_linearized = lp.get_one_linearized_kernel(lp.preprocess_kernel(knl))
-    assert loop_order(knl_linearized.linearization)[0] == "k"
+    lin_knl = lp.get_one_linearized_kernel(lp.preprocess_kernel(knl))
+    assert loop_order(lin_knl.linearization)[0] == "k"
 
     knl = ref_knl
     knl = lp.constrain_loop_nesting(
         knl,
         must_nest=("{g, h, i, j}", "k"),
         )
-    knl_linearized = lp.get_one_linearized_kernel(lp.preprocess_kernel(knl))
-    assert loop_order(knl_linearized.linearization)[-1] == "k"
+    lin_knl = lp.get_one_linearized_kernel(lp.preprocess_kernel(knl))
+    assert loop_order(lin_knl.linearization)[-1] == "k"
 
     knl = ref_knl
     knl = lp.constrain_loop_nesting(
         knl,
         must_nest=("{g, h, i}", "{j, k}"),
         )
-    knl_linearized = lp.get_one_linearized_kernel(lp.preprocess_kernel(knl))
-    assert set(loop_order(knl_linearized.linearization)[-2:]) == set(["j", "k"])
+    lin_knl = lp.get_one_linearized_kernel(lp.preprocess_kernel(knl))
+    assert set(loop_order(lin_knl.linearization)[-2:]) == set(["j", "k"])
 
     knl = ref_knl
     knl = lp.constrain_loop_nesting(
@@ -534,20 +534,20 @@ def loop_order(linearization_items):
         knl,
         must_nest=("i", "{g, h}"),
         )
-    knl_linearized = lp.get_one_linearized_kernel(lp.preprocess_kernel(knl))
-    assert set(loop_order(knl_linearized.linearization)[3:]) == set(["j", "k"])
-    assert set(loop_order(knl_linearized.linearization)[1:3]) == set(["g", "h"])
-    assert loop_order(knl_linearized.linearization)[0] == "i"
+    lin_knl = lp.get_one_linearized_kernel(lp.preprocess_kernel(knl))
+    assert set(loop_order(lin_knl.linearization)[3:]) == set(["j", "k"])
+    assert set(loop_order(lin_knl.linearization)[1:3]) == set(["g", "h"])
+    assert loop_order(lin_knl.linearization)[0] == "i"
 
     knl = ref_knl
     knl = lp.constrain_loop_nesting(
         knl,
         must_nest=("i", "{g, h}", "{j, k}"),
         )
-    knl_linearized = lp.get_one_linearized_kernel(lp.preprocess_kernel(knl))
-    assert set(loop_order(knl_linearized.linearization)[3:]) == set(["j", "k"])
-    assert set(loop_order(knl_linearized.linearization)[1:3]) == set(["g", "h"])
-    assert loop_order(knl_linearized.linearization)[0] == "i"
+    lin_knl = lp.get_one_linearized_kernel(lp.preprocess_kernel(knl))
+    assert set(loop_order(lin_knl.linearization)[3:]) == set(["j", "k"])
+    assert set(loop_order(lin_knl.linearization)[1:3]) == set(["g", "h"])
+    assert loop_order(lin_knl.linearization)[0] == "i"
 
     # must_not_nest constraints
 
@@ -556,24 +556,24 @@ def loop_order(linearization_items):
         knl,
         must_not_nest=("~k", "k"),
         )
-    knl_linearized = lp.get_one_linearized_kernel(lp.preprocess_kernel(knl))
-    assert loop_order(knl_linearized.linearization)[0] == "k"
+    lin_knl = lp.get_one_linearized_kernel(lp.preprocess_kernel(knl))
+    assert loop_order(lin_knl.linearization)[0] == "k"
 
     knl = ref_knl
     knl = lp.constrain_loop_nesting(
         knl,
         must_not_nest=("k", "~k"),
         )
-    knl_linearized = lp.get_one_linearized_kernel(lp.preprocess_kernel(knl))
-    assert loop_order(knl_linearized.linearization)[-1] == "k"
+    lin_knl = lp.get_one_linearized_kernel(lp.preprocess_kernel(knl))
+    assert loop_order(lin_knl.linearization)[-1] == "k"
 
     knl = ref_knl
     knl = lp.constrain_loop_nesting(
         knl,
         must_not_nest=("{j, k}", "~{j, k}"),
         )
-    knl_linearized = lp.get_one_linearized_kernel(lp.preprocess_kernel(knl))
-    assert set(loop_order(knl_linearized.linearization)[-2:]) == set(["j", "k"])
+    lin_knl = lp.get_one_linearized_kernel(lp.preprocess_kernel(knl))
+    assert set(loop_order(lin_knl.linearization)[-2:]) == set(["j", "k"])
 
     knl = ref_knl
     knl = lp.constrain_loop_nesting(
@@ -584,10 +584,10 @@ def loop_order(linearization_items):
         knl,
         must_nest=("i", "{g, h}"),
         )
-    knl_linearized = lp.get_one_linearized_kernel(lp.preprocess_kernel(knl))
-    assert set(loop_order(knl_linearized.linearization)[3:]) == set(["j", "k"])
-    assert set(loop_order(knl_linearized.linearization)[1:3]) == set(["g", "h"])
-    assert loop_order(knl_linearized.linearization)[0] == "i"
+    lin_knl = lp.get_one_linearized_kernel(lp.preprocess_kernel(knl))
+    assert set(loop_order(lin_knl.linearization)[3:]) == set(["j", "k"])
+    assert set(loop_order(lin_knl.linearization)[1:3]) == set(["g", "h"])
+    assert loop_order(lin_knl.linearization)[0] == "i"
 
     # must_nest + must_not_nest
     knl = ref_knl
@@ -596,18 +596,18 @@ def loop_order(linearization_items):
         must_nest=("{g, h, i}", "{j, k}"),
         must_not_nest=("i", "{g, h}"),
         )
-    knl_linearized = lp.get_one_linearized_kernel(lp.preprocess_kernel(knl))
-    assert set(loop_order(knl_linearized.linearization)[3:]) == set(["j", "k"])
-    assert set(loop_order(knl_linearized.linearization)[0:2]) == set(["g", "h"])
-    assert loop_order(knl_linearized.linearization)[2] == "i"
+    lin_knl = lp.get_one_linearized_kernel(lp.preprocess_kernel(knl))
+    assert set(loop_order(lin_knl.linearization)[3:]) == set(["j", "k"])
+    assert set(loop_order(lin_knl.linearization)[0:2]) == set(["g", "h"])
+    assert loop_order(lin_knl.linearization)[2] == "i"
 
     knl = ref_knl
     knl = lp.constrain_loop_nesting(
         knl,
         must_not_nest=("i", "~i"),
         )
-    knl_linearized = lp.get_one_linearized_kernel(lp.preprocess_kernel(knl))
-    assert loop_order(knl_linearized.linearization)[-1] == "i"
+    lin_knl = lp.get_one_linearized_kernel(lp.preprocess_kernel(knl))
+    assert loop_order(lin_knl.linearization)[-1] == "i"
 
     # contradictory must_not_nest
 
@@ -642,9 +642,9 @@ def test_constraint_updating_split_iname():
     from loopy.transform.iname import get_iname_nestings
 
     def linearize_and_get_nestings(unlinearized_knl):
-        knl_linearized = lp.get_one_linearized_kernel(
+        lin_knl = lp.get_one_linearized_kernel(
             lp.preprocess_kernel(unlinearized_knl))
-        return get_iname_nestings(knl_linearized.linearization)
+        return get_iname_nestings(lin_knl.linearization)
 
     ref_knl = lp.make_kernel(
             "{ [g,h,i,j,k]: 0<=g,h,i,j,k<n }",

From 18f45fbc39d2e1afd0569a9b864b88c5473f2c8c Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Tue, 27 Apr 2021 03:06:54 -0500
Subject: [PATCH 361/460] rename variable
 coalesce_duplicate_replacement_inames->coalesce_new_iname_duplicates

---
 loopy/transform/iname.py | 15 +++++++--------
 1 file changed, 7 insertions(+), 8 deletions(-)

diff --git a/loopy/transform/iname.py b/loopy/transform/iname.py
index 4a38e73ac..6767955c5 100644
--- a/loopy/transform/iname.py
+++ b/loopy/transform/iname.py
@@ -734,7 +734,7 @@ def get_graph_sources(graph):
 
 def replace_inames_in_nest_constraints(
         inames_to_replace, replacement_inames, old_constraints,
-        coalesce_duplicate_replacement_inames=False,
+        coalesce_new_iname_duplicates=False,
         keep_old_inames=False,
         ):
     """
@@ -786,7 +786,7 @@ def replace_inames_in_nest_constraints(
     # [{i, k}, {j, h}], at this point we have [{ij, k}, {ij, h}]
     # which contains a cycle. If coalescing is enabled, change this
     # to [{k}, ij, {h}] to remove the cycle.
-    if coalesce_duplicate_replacement_inames:
+    if coalesce_new_iname_duplicates:
 
         def coalesce_duplicate_inames_in_nesting(nesting, coalesce_candidates):
             # TODO would like this to be fully generic, but for now, assumes
@@ -926,13 +926,12 @@ def replace_inames_in_graph(
 
 def replace_inames_in_all_nest_constraints(
         kernel, old_inames, new_inames,
-        coalesce_duplicate_replacement_inames=False,
+        coalesce_new_iname_duplicates=False,
         pairs_that_must_not_voilate_constraints=set(),
         keep_old_inames=False,
         ):
     # replace each iname in old_inames with all inames in new_inames
     # TODO What was pairs_that_must_not_voilate_constraints used for???
-    # TODO handle case where we want to keep old inames around
 
     # get old must_nest and must_not_nest
     # (must_nest_graph will be rebuilt)
@@ -956,7 +955,7 @@ def replace_inames_in_all_nest_constraints(
 
         new_must_nest = replace_inames_in_nest_constraints(
             old_inames, new_inames, old_must_nest,
-            coalesce_duplicate_replacement_inames=coalesce_duplicate_replacement_inames,
+            coalesce_new_iname_duplicates=coalesce_new_iname_duplicates,
             keep_old_inames=keep_old_inames,
             )
     else:
@@ -973,12 +972,12 @@ def replace_inames_in_all_nest_constraints(
 
         new_must_not_nest = replace_inames_in_nest_constraints(
             old_inames, new_inames, old_must_not_nest,
-            coalesce_duplicate_replacement_inames=False,
+            coalesce_new_iname_duplicates=False,
             # (for now, never coalesce must-not-nest constraints)
             keep_old_inames=keep_old_inames,
             )
         # each must not nest constraint may only contain two tiers
-        # TODO coalesce_duplicate_replacement_inames?
+        # TODO coalesce_new_iname_duplicates?
     else:
         new_must_not_nest = None
 
@@ -2287,7 +2286,7 @@ def _remove_iname_from_dep(dep):
 
     kernel = replace_inames_in_all_nest_constraints(
         kernel, old_inames=unused_inames, new_inames=[],
-        coalesce_duplicate_replacement_inames=False,
+        coalesce_new_iname_duplicates=False,
         pairs_that_must_not_voilate_constraints=set(),
         keep_old_inames=False,
         )

From f213da89b0fc0a27711f5fbfcdddb2172566b5ce Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Tue, 27 Apr 2021 03:28:03 -0500
Subject: [PATCH 362/460] fix flake8 issue

---
 test/test_nest_constraints.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/test/test_nest_constraints.py b/test/test_nest_constraints.py
index 1d7156843..8fd84bdbe 100644
--- a/test/test_nest_constraints.py
+++ b/test/test_nest_constraints.py
@@ -712,10 +712,10 @@ def linearize_and_get_nestings(unlinearized_knl):
 
     ref_knl = lp.make_kernel(
             "{ [g,h,i,j,k]: 0<=g,h,i,j,k<n }",
-            '''
+            """
             out1[g,h,i,j,k] = 2*a[g,h,i,j,k]  {id=insn1}
             out2[i,j,k] = 2+i+j+k  {id=insn2}
-            ''',
+            """,
             assumptions="n >= 1",
             )
     ref_knl = lp.add_and_infer_dtypes(

From 5f3c575fdea52f94bc68e59264e91f62664b3b04 Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Tue, 27 Apr 2021 05:56:11 -0500
Subject: [PATCH 363/460] eliminate unnecessary keep_old_inames option when
 replacing inames in nest constraints (can just include old inames in the
 replacement inames to accomplish same result)

---
 loopy/transform/iname.py | 27 ++++++++++-----------------
 1 file changed, 10 insertions(+), 17 deletions(-)

diff --git a/loopy/transform/iname.py b/loopy/transform/iname.py
index 6767955c5..9296e733e 100644
--- a/loopy/transform/iname.py
+++ b/loopy/transform/iname.py
@@ -735,7 +735,6 @@ def get_graph_sources(graph):
 def replace_inames_in_nest_constraints(
         inames_to_replace, replacement_inames, old_constraints,
         coalesce_new_iname_duplicates=False,
-        keep_old_inames=False,
         ):
     """
     :arg inames_to_replace: A set of inames that may exist in
@@ -764,11 +763,7 @@ def replace_inames_in_nest_constraints(
 
             # create the new set of inames with the replacements
             if inames_found:
-                if keep_old_inames:
-                    # TODO is copy necessary?
-                    new_inames = iname_set.inames.copy()
-                else:
-                    new_inames = iname_set.inames - inames_found
+                new_inames = iname_set.inames - inames_found
                 new_inames.update(replacement_inames)
             else:
                 new_inames = iname_set.inames.copy()
@@ -928,7 +923,6 @@ def replace_inames_in_all_nest_constraints(
         kernel, old_inames, new_inames,
         coalesce_new_iname_duplicates=False,
         pairs_that_must_not_voilate_constraints=set(),
-        keep_old_inames=False,
         ):
     # replace each iname in old_inames with all inames in new_inames
     # TODO What was pairs_that_must_not_voilate_constraints used for???
@@ -956,7 +950,6 @@ def replace_inames_in_all_nest_constraints(
         new_must_nest = replace_inames_in_nest_constraints(
             old_inames, new_inames, old_must_nest,
             coalesce_new_iname_duplicates=coalesce_new_iname_duplicates,
-            keep_old_inames=keep_old_inames,
             )
     else:
         new_must_nest = None
@@ -974,7 +967,6 @@ def replace_inames_in_all_nest_constraints(
             old_inames, new_inames, old_must_not_nest,
             coalesce_new_iname_duplicates=False,
             # (for now, never coalesce must-not-nest constraints)
-            keep_old_inames=keep_old_inames,
             )
         # each must not nest constraint may only contain two tiers
         # TODO coalesce_new_iname_duplicates?
@@ -984,11 +976,8 @@ def replace_inames_in_all_nest_constraints(
     # Rebuild must_nest graph
     if new_must_nest:
         new_must_nest_graph = {}
-        if keep_old_inames:
-            new_all_inames = kernel.all_inames() | set(new_inames)
-        else:
-            new_all_inames = (
-                kernel.all_inames() - set(old_inames)) | set(new_inames)
+        new_all_inames = (
+            kernel.all_inames() - set(old_inames)) | set(new_inames)
         from pytools.graph import CycleError
         for must_nest_tuple in new_must_nest:
             try:
@@ -1291,8 +1280,8 @@ def _split_iname_in_dependee(dep):
 
     # update must_nest, must_not_nest, and must_nest_graph
     kernel = replace_inames_in_all_nest_constraints(
-        kernel, set([iname_to_split, ]), [inner_iname, outer_iname],
-        keep_old_inames=True,
+        kernel,
+        set([iname_to_split, ]), [iname_to_split, inner_iname, outer_iname],
         )
 
     # }}}
@@ -1891,6 +1880,11 @@ def duplicate_inames(kernel, inames, within, new_inames=None, suffix=None,
         from loopy.kernel.tools import DomainChanger
         domch = DomainChanger(kernel, frozenset([old_iname]))
 
+        # update must_nest, must_not_nest, and must_nest_graph
+        # (don't remove any unused inames yet, that happens later)
+        #knl = replace_inames_in_all_nest_constraints(
+        #    knl, set([old_iname, ]), [old_iname, new_iname])
+
         from loopy.isl_helpers import duplicate_axes
         kernel = kernel.copy(
                 domains=domch.get_domains_with(
@@ -2288,7 +2282,6 @@ def _remove_iname_from_dep(dep):
         kernel, old_inames=unused_inames, new_inames=[],
         coalesce_new_iname_duplicates=False,
         pairs_that_must_not_voilate_constraints=set(),
-        keep_old_inames=False,
         )
 
     # }}}

From 4cc5175639e8d45e3459e2edb3c04f65b0e4fe44 Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Tue, 27 Apr 2021 06:45:53 -0500
Subject: [PATCH 364/460] update loop nest constraints in duplicate_inames (and
 handle within cases)

---
 loopy/transform/iname.py | 24 +++++++++++++++++++-----
 1 file changed, 19 insertions(+), 5 deletions(-)

diff --git a/loopy/transform/iname.py b/loopy/transform/iname.py
index 9296e733e..da72f2277 100644
--- a/loopy/transform/iname.py
+++ b/loopy/transform/iname.py
@@ -1880,10 +1880,13 @@ def duplicate_inames(kernel, inames, within, new_inames=None, suffix=None,
         from loopy.kernel.tools import DomainChanger
         domch = DomainChanger(kernel, frozenset([old_iname]))
 
-        # update must_nest, must_not_nest, and must_nest_graph
+        # # {{{ update nest constraints
+
         # (don't remove any unused inames yet, that happens later)
-        #knl = replace_inames_in_all_nest_constraints(
-        #    knl, set([old_iname, ]), [old_iname, new_iname])
+        kernel = replace_inames_in_all_nest_constraints(
+            kernel, set([old_iname, ]), [old_iname, new_iname])
+
+        # }}}
 
         from loopy.isl_helpers import duplicate_axes
         kernel = kernel.copy(
@@ -1948,6 +1951,18 @@ def _rename_iname_in_dep_in(dep):
 
     # }}}
 
+    # TODO why isn't remove_unused_inames called on kernel here?
+
+    # {{{ if there are any now unused inames, remove from nest constraints
+
+    now_unused_inames = (set(inames) - get_used_inames(kernel)) & set(inames)
+    kernel = replace_inames_in_all_nest_constraints(
+        kernel, old_inames=now_unused_inames, new_inames=[],
+        coalesce_new_iname_duplicates=False,
+        )
+
+    # }}}
+
     return kernel
 
 # }}}
@@ -2276,12 +2291,11 @@ def _remove_iname_from_dep(dep):
 
     # }}}
 
-    # # {{{ Remove inames from loop nest constraints
+    # {{{ Remove inames from loop nest constraints
 
     kernel = replace_inames_in_all_nest_constraints(
         kernel, old_inames=unused_inames, new_inames=[],
         coalesce_new_iname_duplicates=False,
-        pairs_that_must_not_voilate_constraints=set(),
         )
 
     # }}}

From 725346304f2c16c08dcb59e587f6c0aeba8006d3 Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Tue, 27 Apr 2021 06:46:09 -0500
Subject: [PATCH 365/460] add test for nest constraint updating in
 duplciate_inames

---
 test/test_nest_constraints.py | 124 ++++++++++++++++++++++++++++++----
 1 file changed, 112 insertions(+), 12 deletions(-)

diff --git a/test/test_nest_constraints.py b/test/test_nest_constraints.py
index 8fd84bdbe..00cff0034 100644
--- a/test/test_nest_constraints.py
+++ b/test/test_nest_constraints.py
@@ -635,16 +635,21 @@ def loop_order(lin_items):
 
 # {{{ test constraint updating during transformation
 
-# {{{ test_constraint_updating_split_iname
 
-def test_constraint_updating_split_iname():
+# {{{ helper functions
 
+def _linearize_and_get_nestings(unlinearized_knl):
     from loopy.transform.iname import get_iname_nestings
+    lin_knl = lp.get_one_linearized_kernel(
+        lp.preprocess_kernel(unlinearized_knl))
+    return get_iname_nestings(lin_knl.linearization)
+
+# }}}
+
+
+# {{{ test_constraint_updating_split_iname
 
-    def linearize_and_get_nestings(unlinearized_knl):
-        lin_knl = lp.get_one_linearized_kernel(
-            lp.preprocess_kernel(unlinearized_knl))
-        return get_iname_nestings(lin_knl.linearization)
+def test_constraint_updating_split_iname():
 
     ref_knl = lp.make_kernel(
             "{ [g,h,i,j,k]: 0<=g,h,i,j,k<n }",
@@ -663,7 +668,7 @@ def linearize_and_get_nestings(unlinearized_knl):
         must_nest=("k", "{g, h, i, j}"),
         )
     knl = lp.split_iname(knl, "j", 4)
-    loop_nesting = linearize_and_get_nestings(knl)[0]  # only one nesting
+    loop_nesting = _linearize_and_get_nestings(knl)[0]  # only one nesting
     assert loop_nesting[0] == "k"
 
     knl = ref_knl
@@ -672,7 +677,7 @@ def linearize_and_get_nestings(unlinearized_knl):
         must_nest=("{g, h, i, j}", "k"),
         )
     knl = lp.split_iname(knl, "j", 4)
-    loop_nesting = linearize_and_get_nestings(knl)[0]  # only one nesting
+    loop_nesting = _linearize_and_get_nestings(knl)[0]  # only one nesting
     assert loop_nesting[-1] == "k"
 
     knl = ref_knl
@@ -687,7 +692,7 @@ def linearize_and_get_nestings(unlinearized_knl):
         )
     knl = lp.split_iname(knl, "g", 4)
     knl = lp.split_iname(knl, "j", 4)
-    loop_nesting = linearize_and_get_nestings(knl)[0]  # only one nesting
+    loop_nesting = _linearize_and_get_nestings(knl)[0]  # only one nesting
     assert loop_nesting[0] == "i"
     assert set(loop_nesting[1:4]) == set(["g_outer", "g_inner", "h"])
     assert set(loop_nesting[4:]) == set(["j_outer", "j_inner", "k"])
@@ -703,7 +708,7 @@ def linearize_and_get_nestings(unlinearized_knl):
         must_nest=("{g, h, i}", "{j, k}"),
         )
     knl = lp.split_iname(knl, "g", 4)
-    loop_nesting = linearize_and_get_nestings(knl)[0]  # only one nesting
+    loop_nesting = _linearize_and_get_nestings(knl)[0]  # only one nesting
     assert loop_nesting[0] == "i"
     assert loop_nesting[1:4] == ("g_outer", "g_inner", "h")
     assert set(loop_nesting[4:]) == set(["j", "k"])
@@ -728,20 +733,115 @@ def linearize_and_get_nestings(unlinearized_knl):
 
     knl = ref_knl
     knl = lp.split_iname(knl, "j", 4, within="id:insn1")
-    loop_nestings = linearize_and_get_nestings(knl)
+    loop_nestings = _linearize_and_get_nestings(knl)
     assert ("k", "i", "j_outer", "j_inner", "h", "g") in loop_nestings
     assert ("k", "i", "j") in loop_nestings
     assert len(loop_nestings) == 2
 
     knl = ref_knl
     knl = lp.split_iname(knl, "j", 4, within="id:insn2")
-    loop_nestings = linearize_and_get_nestings(knl)
+    loop_nestings = _linearize_and_get_nestings(knl)
     assert ("k", "i", "j", "h", "g") in loop_nestings
     assert ("k", "i", "j_outer", "j_inner") in loop_nestings
     assert len(loop_nestings) == 2
 
 # }}}
 
+
+# {{{
+
+def test_constraint_updating_duplicate_inames():
+
+    ref_knl = lp.make_kernel(
+            "{ [g,h,i,j,k]: 0<=g,h,i,j,k<n }",
+            """
+            out[g,h,i,j,k] = 2*a[g,h,i,j,k]  {id=insn}
+            out0[g,h,i,j,k] = 2*a0[g,h,i,j,k]  {id=insn0,dep=insn}
+            """,
+            assumptions="n >= 1",
+            )
+    ref_knl = lp.add_and_infer_dtypes(
+        ref_knl,
+        {"a": np.dtype(np.float32), "a0": np.dtype(np.float32)})
+
+    # duplicate within insn0
+
+    knl = ref_knl
+    knl = lp.constrain_loop_nesting(
+        knl,
+        must_nest=("i", "{g, h, j, k}"),
+        )
+    knl = lp.duplicate_inames(
+        knl,
+        inames=["g", "h"],
+        within="id:insn0",
+        new_inames=["gg", "hh"])
+
+    must_nest_graph_exp = dict([
+        (iname, set()) for iname in ["g", "h", "j", "k", "gg", "hh"]])
+    must_nest_graph_exp["i"] = set(["g", "h", "j", "k", "gg", "hh"])
+
+    assert knl.loop_nest_constraints.must_nest_graph == must_nest_graph_exp
+
+    nesting_for_insn, nesting_for_insn0 = _linearize_and_get_nestings(knl)
+
+    # i must be outermost
+    assert nesting_for_insn[0] == nesting_for_insn0[0] == "i"
+    # j and k are shared between both insns, must come next
+    assert (
+        set(nesting_for_insn[1:3]) ==
+        set(nesting_for_insn0[1:3]) ==
+        set(["j", "k"]))
+    # g,h and gg,hh should come after that
+    assert set(nesting_for_insn[3:]) == set(["g", "h"])
+    assert set(nesting_for_insn0[3:]) == set(["gg", "hh"])  # new names
+
+    # duplicate within BOTH insns
+
+    knl = ref_knl
+    knl = lp.constrain_loop_nesting(
+        knl,
+        must_nest=("i", "{g, h, j, k}"),
+        )
+    knl = lp.duplicate_inames(
+        knl,
+        inames=["g", "h"],
+        within="id:insn0 or id:insn",
+        new_inames=["gg", "hh"])
+
+    must_nest_graph_exp = dict([
+        (iname, set()) for iname in ["j", "k", "gg", "hh"]])
+    must_nest_graph_exp["i"] = set(["j", "k", "gg", "hh"])
+
+    assert knl.loop_nest_constraints.must_nest_graph == must_nest_graph_exp
+
+    loop_nestings = _linearize_and_get_nestings(knl)
+    assert len(loop_nestings) == 1
+    loop_nesting = loop_nestings[0]
+
+    # i must be outermost
+    assert loop_nesting[0] == loop_nesting[0] == "i"
+    # j and k are shared between both insns, must come next
+    assert set(loop_nesting[1:]) == set(["j", "k", "gg", "hh"])
+
+    # duplicate within insn0 with must-not-nest
+
+    knl = ref_knl
+    knl = lp.constrain_loop_nesting(
+        knl,
+        must_not_nest=("~{i}", "i"),
+        )
+    knl = lp.duplicate_inames(
+        knl,
+        inames=["g", "h"],
+        within="id:insn0",
+        new_inames=["gg", "hh"])
+    nesting_for_insn, nesting_for_insn0 = _linearize_and_get_nestings(knl)
+
+    assert nesting_for_insn[0] == nesting_for_insn0[0] == "i"
+
+# }}}
+
 # TODO make standalone test for constraint updating functions that
 # doesn't bother with transforms/linearization
 

From b086ecffd405d462eb182a3ff731c868de1d03f1 Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Tue, 27 Apr 2021 07:01:55 -0500
Subject: [PATCH 366/460] raise NotImplementedError in rename_iname() when
 new_iname does exist

---
 loopy/transform/iname.py | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/loopy/transform/iname.py b/loopy/transform/iname.py
index da72f2277..60439428e 100644
--- a/loopy/transform/iname.py
+++ b/loopy/transform/iname.py
@@ -2144,6 +2144,16 @@ def rename_iname(kernel, old_iname, new_iname, existing_ok=False, within=None):
                 "--cannot rename" % new_iname)
 
     if does_exist:
+
+        # TODO implement this
+        if kernel.loop_nest_constraints and (
+                kernel.loop_nest_constraints.must_nest or
+                kernel.loop_nest_constraints.must_not_nest or
+                kernel.loop_nest_constraints.must_nest_graph):
+            raise NotImplementedError(
+                "rename_iname() does not yet handle new loop nest "
+                "constraints when does_exist=True.")
+
         # {{{ check that the domains match up
 
         dom = kernel.get_inames_domain(frozenset((old_iname, new_iname)))

From 2f4297a026de95d09af3ac95354c7b5ffe83be8a Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Tue, 27 Apr 2021 07:02:11 -0500
Subject: [PATCH 367/460] add test for constraitn updating in rename_iname

---
 test/test_nest_constraints.py | 39 ++++++++++++++++++++++++++++++++++-
 1 file changed, 38 insertions(+), 1 deletion(-)

diff --git a/test/test_nest_constraints.py b/test/test_nest_constraints.py
index 00cff0034..38ce3cd3b 100644
--- a/test/test_nest_constraints.py
+++ b/test/test_nest_constraints.py
@@ -748,7 +748,7 @@ def test_constraint_updating_split_iname():
 # }}}
 
 
-# {{{
+# {{{ test_constraint_updating_duplicate_inames
 
 def test_constraint_updating_duplicate_inames():
 
@@ -842,6 +842,43 @@ def test_constraint_updating_duplicate_inames():
 
 # }}}
 
+
+# {{{ test_constraint_updating_rename_iname
+
+def test_constraint_updating_rename_iname():
+
+    ref_knl = lp.make_kernel(
+            "{ [g,h,i,j,k]: 0<=g,h,i,j,k<n }",
+            """
+            out[g,h,i,j,k] = 2*a[g,h,i,j,k]  {id=insn}
+            """,
+            assumptions="n >= 1",
+            )
+    ref_knl = lp.add_and_infer_dtypes(ref_knl, {"a": np.dtype(np.float32)})
+
+    # Test rename_iname (+ remove_unused_inames) where new iname does not exist
+    knl = ref_knl
+    knl = lp.constrain_loop_nesting(
+        knl,
+        must_nest=("i", "{g, h, j, k}"),
+        must_not_nest=("h", "g"),
+        )
+    knl = lp.constrain_loop_nesting(
+        knl,
+        must_nest=("{g, h, i}", "{j, k}"),
+        )
+    knl = lp.rename_iname(knl, "g", "g_new")
+    knl = lp.rename_iname(knl, "h", "h_new")
+    knl = lp.rename_iname(knl, "i", "i_new")
+    loop_nesting = _linearize_and_get_nestings(knl)[0]  # only one nesting
+    assert loop_nesting[0] == "i_new"
+    assert loop_nesting[1:3] == ("g_new", "h_new")
+    assert set(loop_nesting[3:]) == set(["j", "k"])
+
+    # TODO Test rename_iname where new iname DOES not exist (once implemented)
+
+# }}}
+
 # TODO make standalone test for constraint updating functions that
 # doesn't bother with transforms/linearization
 

From 191abbcba644ccd3988b3ec03db7a1da3d0c49c2 Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Tue, 27 Apr 2021 07:18:23 -0500
Subject: [PATCH 368/460] Don't allow tagging of iname found in must_nest
 constraint as concurrent

---
 loopy/transform/iname.py | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

diff --git a/loopy/transform/iname.py b/loopy/transform/iname.py
index 60439428e..d0beb85a8 100644
--- a/loopy/transform/iname.py
+++ b/loopy/transform/iname.py
@@ -1756,6 +1756,7 @@ def parse_tag(tag):
 
     # }}}
 
+    from loopy.kernel.data import ConcurrentTag
     knl_inames = kernel.inames.copy()
     for name, new_tag in iname_to_tag.items():
         if not new_tag:
@@ -1766,6 +1767,21 @@ def parse_tag(tag):
 
         knl_inames[name] = knl_inames[name].tagged(new_tag)
 
+        # {{{ Don't allow tagging of must_nest iname as concurrent
+        # TODO ...but what about 'vec'?
+
+        if isinstance(new_tag, ConcurrentTag) and kernel.loop_nest_constraints:
+            must_nest = kernel.loop_nest_constraints.must_nest
+            if must_nest:
+                for nesting in must_nest:
+                    for iname_set in nesting:
+                        if iname in iname_set.inames:
+                            raise ValueError("cannot tag '%s' as concurrent--"
+                                    "iname involved in must-nest constraint %s."
+                                    % (iname, nesting))
+
+        # }}}
+
     return kernel.copy(inames=knl_inames)
 
 # }}}

From 96d07d5a63a0d1a5e1c40b861e2cbcdc4726e2be Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Tue, 27 Apr 2021 07:18:46 -0500
Subject: [PATCH 369/460] test error when tagging iname found in must_nest
 constraint as concurrent

---
 test/test_nest_constraints.py | 33 +++++++++++++++++++++++++++++++++
 1 file changed, 33 insertions(+)

diff --git a/test/test_nest_constraints.py b/test/test_nest_constraints.py
index 38ce3cd3b..929ddf331 100644
--- a/test/test_nest_constraints.py
+++ b/test/test_nest_constraints.py
@@ -879,6 +879,39 @@ def test_constraint_updating_rename_iname():
 
 # }}}
 
+
+# {{{ test_constraint_handling_tag_inames
+
+def test_constraint_handling_tag_inames():
+
+    ref_knl = lp.make_kernel(
+            "{ [g,h,i,j,k]: 0<=g,h,i,j,k<n }",
+            """
+            out[g,h,i,j,k] = 2*a[g,h,i,j,k]  {id=insn}
+            """,
+            assumptions="n >= 1",
+            )
+    ref_knl = lp.add_and_infer_dtypes(ref_knl, {"a": np.dtype(np.float32)})
+
+    # should error when constrained inames are tagged as concurrent
+    knl = ref_knl
+    knl = lp.constrain_loop_nesting(
+        knl,
+        must_nest=("i", "{j, k}"),
+        must_not_nest=("h", "g"),
+        )
+    try:
+        lp.tag_inames(knl, {"i": "l.0"})
+        assert False
+    except ValueError as e:
+        assert (
+            "cannot tag 'i' as concurrent--iname involved in must-nest constraint"
+            in str(e))
+
+    # Need to test anything else here...?
+
+# }}}
+
 # TODO make standalone test for constraint updating functions that
 # doesn't bother with transforms/linearization
 

From cdacde0b2177fec57b10efe0e0015283c613c7db Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Tue, 27 Apr 2021 07:45:18 -0500
Subject: [PATCH 370/460] handle constraint updating in join_inames (when
 within=None)

---
 loopy/transform/iname.py | 32 +++++++++++++++++++++++++++++++-
 1 file changed, 31 insertions(+), 1 deletion(-)

diff --git a/loopy/transform/iname.py b/loopy/transform/iname.py
index d0beb85a8..0bb19532d 100644
--- a/loopy/transform/iname.py
+++ b/loopy/transform/iname.py
@@ -925,7 +925,6 @@ def replace_inames_in_all_nest_constraints(
         pairs_that_must_not_voilate_constraints=set(),
         ):
     # replace each iname in old_inames with all inames in new_inames
-    # TODO What was pairs_that_must_not_voilate_constraints used for???
 
     # get old must_nest and must_not_nest
     # (must_nest_graph will be rebuilt)
@@ -1609,6 +1608,37 @@ def subst_within_inames(fid):
                 applied_iname_rewrites=kernel.applied_iname_rewrites + [subst_dict]
                 ))
 
+    # {{{ update must_nest, must_not_nest, and must_nest_graph
+
+    if kernel.loop_nest_constraints and (
+            kernel.loop_nest_constraints.must_nest or
+            kernel.loop_nest_constraints.must_not_nest or
+            kernel.loop_nest_constraints.must_nest_graph):
+
+        if within != parse_match(None):
+            raise NotImplementedError(
+                "join_inames() does not yet handle new loop nest "
+                "constraints when within is not None.")
+
+        # When joining inames, we create several implied loop nestings.
+        # make sure that these implied nestings don't violate existing
+        # constraints.
+
+        # (will fail if cycle is created in must-nest graph)
+        implied_nestings = set()
+        inames_orig_order = inames[::-1]  # this was reversed above
+        for i, iname_before in enumerate(inames_orig_order[:-1]):
+            for iname_after in inames_orig_order[i+1:]:
+                implied_nestings.add((iname_before, iname_after))
+
+        kernel = replace_inames_in_all_nest_constraints(
+            kernel, set(inames), [new_iname],
+            coalesce_new_iname_duplicates=True,
+            pairs_that_must_not_voilate_constraints=implied_nestings,
+            )
+
+    # }}}
+
     from loopy.match import parse_stack_match
     within = parse_stack_match(within)
 

From 7ec404f243322d3386822ac97c1911c3fafbdd2f Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Tue, 27 Apr 2021 07:45:37 -0500
Subject: [PATCH 371/460] test constraint handling in join_inames()

---
 test/test_nest_constraints.py | 223 ++++++++++++++++++++++++++++++++++
 1 file changed, 223 insertions(+)

diff --git a/test/test_nest_constraints.py b/test/test_nest_constraints.py
index 929ddf331..40e52420a 100644
--- a/test/test_nest_constraints.py
+++ b/test/test_nest_constraints.py
@@ -912,6 +912,229 @@ def test_constraint_handling_tag_inames():
 
 # }}}
 
+
+# {{{ test_constraint_updating_join_inames
+
+def test_constraint_updating_join_inames():
+
+    ref_knl = lp.make_kernel(
+            "{ [g,h,i,j,k]: 0<=g,h,i,j,k<1024 }",
+            """
+            out[g,h,i,j,k] = 2*a[g,h,i,j,k]  {id=insn}
+            """,
+            )
+    ref_knl = lp.add_and_infer_dtypes(ref_knl, {"a": np.dtype(np.float32)})
+
+    knl = ref_knl
+    knl = lp.constrain_loop_nesting(
+        knl,
+        must_nest=("i", "{g, h, j, k}"),
+        must_not_nest=("h", "g"),
+        )
+    knl = lp.constrain_loop_nesting(
+        knl,
+        must_nest=("{g, h, i}", "{j, k}"),
+        )
+    knl = lp.join_inames(knl, inames=["g", "h"], new_iname="gh")
+    loop_nesting = _linearize_and_get_nestings(knl)[0]  # only one nesting
+    assert loop_nesting[0] == "i"
+    assert loop_nesting[1] == "gh"
+    assert set(loop_nesting[2:]) == set(["j", "k"])
+
+    knl = ref_knl
+    knl = lp.constrain_loop_nesting(
+        knl,
+        must_nest=("i", "{g, h, j, k}"),
+        must_not_nest=("h", "g"),
+        )
+    knl = lp.constrain_loop_nesting(
+        knl,
+        must_nest=("{g, h, i}", "{j, k}"),
+        )
+    knl = lp.join_inames(knl, inames=["j", "k"], new_iname="jk")
+    loop_nesting = _linearize_and_get_nestings(knl)[0]  # only one nesting
+    assert loop_nesting[0] == "i"
+    assert loop_nesting[1:3] == ("g", "h")
+    assert loop_nesting[3] == "jk"
+
+    knl = ref_knl
+    knl = lp.constrain_loop_nesting(
+        knl,
+        must_nest=("h", "i", "g", "{j, k}"),
+        )
+    knl = lp.join_inames(knl, inames=["i", "g"], new_iname="ig")
+    loop_nesting = _linearize_and_get_nestings(knl)[0]  # only one nesting
+    assert loop_nesting[0] == "h"
+    assert loop_nesting[1] == "ig"
+    assert set(loop_nesting[2:4]) == set(["j", "k"])
+
+    # test cycle detection
+    knl = ref_knl
+    knl = lp.constrain_loop_nesting(
+        knl,
+        must_nest=("i", "{g, h}", "{j, k}"),
+        )
+    try:
+        lp.join_inames(knl, inames=["i", "k"], new_iname="ik")
+        assert False
+    except ValueError as e:
+        assert "cycle" in str(e)
+
+    # test implied nesting that creates constraint violation
+    knl = ref_knl
+    knl = lp.constrain_loop_nesting(
+        knl,
+        must_not_nest=("i", "k"),
+        )
+    try:
+        lp.join_inames(knl, inames=["i", "k"], new_iname="ik")
+        assert False
+    except ValueError as e:
+        assert "Implied nestings violate existing must-not-nest" in str(e)
+
+# }}}
+
+
+# {{{ test_iname_coalescing_in_loop_nest_constraints
+
+def test_iname_coalescing_in_loop_nest_constraints():
+
+    def get_sets_of_inames(iname_sets_tuple, iname_universe):
+        # convert UnexpandedInameSets to sets
+        sets_of_inames = []
+        for iname_set in iname_sets_tuple:
+            sets_of_inames.append(
+                iname_set.get_inames_represented(iname_universe))
+        return sets_of_inames
+
+    ref_knl = lp.make_kernel(
+            "{ [g,h,i,j,k]: 0<=g,h,i,j,k<1024 }",
+            """
+            out[g,h,i,j,k] = 2*a[g,h,i,j,k]  {id=insn}
+            """,
+            )
+    # (join_inames errors if domain bound is variable)
+
+    ref_knl = lp.add_and_infer_dtypes(ref_knl, {"a": np.dtype(np.float32)})
+
+    knl = ref_knl
+    knl = lp.constrain_loop_nesting(
+        knl,
+        must_nest=("i", "g", "h", "j", "k"),
+        )
+    knl = lp.join_inames(knl, inames=["g", "h"], new_iname="gh")
+    new_must_nest = get_sets_of_inames(
+        list(knl.loop_nest_constraints.must_nest)[0], knl.all_inames())
+    expected_must_nest = [
+        set(["i", ]), set(["gh", ]), set(["j", ]), set(["k", ])]
+    assert new_must_nest == expected_must_nest
+
+    knl = ref_knl
+    knl = lp.constrain_loop_nesting(
+        knl,
+        must_nest=("{i, g}", "h", "j", "k"),
+        )
+    knl = lp.join_inames(knl, inames=["g", "h"], new_iname="gh")
+    new_must_nest = get_sets_of_inames(
+        list(knl.loop_nest_constraints.must_nest)[0], knl.all_inames())
+    expected_must_nest = [
+        set(["i", ]), set(["gh", ]), set(["j", ]), set(["k", ])]
+    assert new_must_nest == expected_must_nest
+
+    knl = ref_knl
+    knl = lp.constrain_loop_nesting(
+        knl,
+        must_nest=("i", "g", "{h, j}", "k"),
+        )
+    knl = lp.join_inames(knl, inames=["g", "h"], new_iname="gh")
+    new_must_nest = get_sets_of_inames(
+        list(knl.loop_nest_constraints.must_nest)[0], knl.all_inames())
+    expected_must_nest = [
+        set(["i", ]), set(["gh", ]), set(["j", ]), set(["k", ])]
+    assert new_must_nest == expected_must_nest
+
+    knl = ref_knl
+    knl = lp.constrain_loop_nesting(
+        knl,
+        must_nest=("i", "g", "{h, j, k}"),
+        )
+    knl = lp.join_inames(knl, inames=["g", "h"], new_iname="gh")
+    new_must_nest = get_sets_of_inames(
+        list(knl.loop_nest_constraints.must_nest)[0], knl.all_inames())
+    expected_must_nest = [
+        set(["i", ]), set(["gh", ]), set(["j", "k"])]
+    assert new_must_nest == expected_must_nest
+
+    knl = ref_knl
+    knl = lp.constrain_loop_nesting(
+        knl,
+        must_nest=("i", "{g, h}", "j", "k"),
+        )
+    knl = lp.join_inames(knl, inames=["g", "h"], new_iname="gh")
+    new_must_nest = get_sets_of_inames(
+        list(knl.loop_nest_constraints.must_nest)[0], knl.all_inames())
+    expected_must_nest = [
+        set(["i", ]), set(["gh", ]), set(["j", ]), set(["k", ])]
+    assert new_must_nest == expected_must_nest
+
+    knl = ref_knl
+    knl = lp.constrain_loop_nesting(
+        knl,
+        must_nest=("{i, g}", "{h, j, k}"),
+        )
+    knl = lp.join_inames(knl, inames=["g", "h"], new_iname="gh")
+    new_must_nest = get_sets_of_inames(
+        list(knl.loop_nest_constraints.must_nest)[0], knl.all_inames())
+    expected_must_nest = [
+        set(["i", ]), set(["gh", ]), set(["j", "k"])]
+    assert new_must_nest == expected_must_nest
+
+    knl = ref_knl
+    knl = lp.constrain_loop_nesting(
+        knl,
+        must_nest=("i", "g", "j", "h", "k"),
+        )
+    try:
+        knl = lp.join_inames(knl, inames=["g", "h"], new_iname="gh")
+        assert False
+    except ValueError as e:
+        assert "contains cycle" in str(e)
+
+    knl = ref_knl
+    knl = lp.constrain_loop_nesting(
+        knl,
+        must_nest=("{i, g}", "j", "{h, k}"),
+        )
+    try:
+        knl = lp.join_inames(knl, inames=["g", "h"], new_iname="gh")
+        assert False
+    except ValueError as e:
+        assert "contains cycle" in str(e)
+
+    knl = ref_knl
+    knl = lp.constrain_loop_nesting(
+        knl,
+        must_nest=("{i, h}", "j", "{g, k}"),
+        )
+    try:
+        knl = lp.join_inames(knl, inames=["g", "h"], new_iname="gh")
+        assert False
+    except ValueError as e:
+        assert "nestings violate existing must-nest" in str(e)
+
+    knl = ref_knl
+    knl = lp.constrain_loop_nesting(
+        knl,
+        must_not_nest=("g", "h"),
+        )
+    try:
+        knl = lp.join_inames(knl, inames=["g", "h"], new_iname="gh")
+        assert False
+    except ValueError as e:
+        assert "nestings violate existing must-not-nest" in str(e)
+
+# }}}
+
 # TODO make standalone test for constraint updating functions that
 # doesn't bother with transforms/linearization
 

From 65cdfb3c055a5ca6413bc7102e317e654bdb50e6 Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Tue, 27 Apr 2021 07:51:58 -0500
Subject: [PATCH 372/460] when inames are tagged as vec, see if there is a
 must-nest constraint that conflicts with them being nested innermost

---
 loopy/transform/iname.py | 23 +++++++++++++++++++----
 1 file changed, 19 insertions(+), 4 deletions(-)

diff --git a/loopy/transform/iname.py b/loopy/transform/iname.py
index 0bb19532d..78abb0702 100644
--- a/loopy/transform/iname.py
+++ b/loopy/transform/iname.py
@@ -1786,7 +1786,7 @@ def parse_tag(tag):
 
     # }}}
 
-    from loopy.kernel.data import ConcurrentTag
+    from loopy.kernel.data import ConcurrentTag, VectorizeTag
     knl_inames = kernel.inames.copy()
     for name, new_tag in iname_to_tag.items():
         if not new_tag:
@@ -1797,10 +1797,24 @@ def parse_tag(tag):
 
         knl_inames[name] = knl_inames[name].tagged(new_tag)
 
-        # {{{ Don't allow tagging of must_nest iname as concurrent
-        # TODO ...but what about 'vec'?
+        # {{{ loop nest constraint handling
 
-        if isinstance(new_tag, ConcurrentTag) and kernel.loop_nest_constraints:
+        if isinstance(new_tag, VectorizeTag):
+            # {{{ vec_inames will be nested innermost, check whether this
+            # conflicts with must-nest constraints
+            must_nest_graph = (kernel.loop_nest_constraints.must_nest_graph
+                if kernel.loop_nest_constraints else None)
+            if must_nest_graph and must_nest_graph.get(iname, set()):
+                # iname is not a leaf
+                raise ValueError(
+                    "Loop priorities provided specify that iname %s nest "
+                    "outside of inames %s, but vectorized inames "
+                    "must nest innermost. Cannot tag %s with 'vec' tag."
+                    % (iname, must_nest_graph.get(iname, set()), iname))
+            # }}}
+
+        elif isinstance(new_tag, ConcurrentTag) and kernel.loop_nest_constraints:
+            # {{{ Don't allow tagging of must_nest iname as concurrent
             must_nest = kernel.loop_nest_constraints.must_nest
             if must_nest:
                 for nesting in must_nest:
@@ -1809,6 +1823,7 @@ def parse_tag(tag):
                             raise ValueError("cannot tag '%s' as concurrent--"
                                     "iname involved in must-nest constraint %s."
                                     % (iname, nesting))
+            # }}}
 
         # }}}
 

From ff482538e17cc2dd89d6da7b82d19d8efcd3014c Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Wed, 5 May 2021 20:18:36 -0500
Subject: [PATCH 373/460] fix bug in map_domain where we attempt to return the
 same kernel if no kernel matches within

---
 loopy/transform/iname.py | 8 +-------
 1 file changed, 1 insertion(+), 7 deletions(-)

diff --git a/loopy/transform/iname.py b/loopy/transform/iname.py
index 29a27f074..1e216509f 100644
--- a/loopy/transform/iname.py
+++ b/loopy/transform/iname.py
@@ -1909,15 +1909,9 @@ def map_domain(kernel, isl_map, within=None, rename_after={}):
 
     # {{{ return the same kernel if no kernel matches
 
-    def _do_not_transform_if_no_within_matches():
-        for insn in kernel.instructions:
-            if within(kernel, insn):
-                return
-
+    if not any(within(kernel, insn) for insn in kernel.instructions):
         return kernel
 
-    _do_not_transform_if_no_within_matches()
-
     # }}}
 
     # }}}

From 0cf937d242fd200b66f8d668d69876e6c4fcd1c8 Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Sat, 8 May 2021 22:42:04 -0500
Subject: [PATCH 374/460] clean up nest constraint code and add more
 comments/documentation

---
 loopy/transform/iname.py      | 315 ++++++++++++++++++++++------------
 test/test_nest_constraints.py |  17 +-
 2 files changed, 207 insertions(+), 125 deletions(-)

diff --git a/loopy/transform/iname.py b/loopy/transform/iname.py
index 6504c2ff0..a5f6176f3 100644
--- a/loopy/transform/iname.py
+++ b/loopy/transform/iname.py
@@ -113,9 +113,9 @@ def prioritize_loops(kernel, loop_priority):
 # }}}
 
 
-# {{{ handle loop nest constraints
+# {{{ Handle loop nest constraints
 
-# {{{ classes to house loop nest constraints
+# {{{ Classes to house loop nest constraints
 
 # {{{ UnexpandedInameSet
 
@@ -127,13 +127,13 @@ def __init__(self, inames, complement=False):
             complement=complement,
             )
 
-    def contains(self, iname):
-        return (iname not in self.inames if self.complement
-            else iname in self.inames)
-
-    def contains_all(self, iname_set):
-        return (not (iname_set & self.inames) if self.complement
-            else iname_set.issubset(self.inames))
+    def contains(self, inames):
+        if isinstance(inames, set):
+            return (not (iname_set & self.inames) if self.complement
+                else iname_set.issubset(self.inames))
+        else:
+            return (inames not in self.inames if self.complement
+                else inames in self.inames)
 
     def get_inames_represented(self, iname_universe=None):
         """Return the set of inames represented by the UnexpandedInameSet
@@ -148,7 +148,7 @@ def get_inames_represented(self, iname_universe=None):
             return self.inames.copy()
 
     def __lt__(self, other):
-        # TODO is this function really necessary? If so, what should it return?
+        # FIXME is this function really necessary? If so, what should it return?
         return self.__hash__() < other.__hash__()
 
     def __hash__(self):
@@ -205,7 +205,7 @@ def __str__(self):
 # }}}
 
 
-# {{{ initial loop nest constraint creation
+# {{{ Initial loop nest constraint creation
 
 # {{{ process_loop_nest_specification
 
@@ -214,19 +214,20 @@ def process_loop_nest_specification(
         max_tuple_size=None,
         complement_sets_allowed=True,
         ):
-    # make sure user-supplied nesting conforms to rules
+
+    # Ensure that user-supplied nesting conforms to syntax rules, and
     # convert string representations of nestings to tuple of UnexpandedInameSets
 
     import re
 
-    def raise_loop_nest_input_error(msg):
+    def _raise_loop_nest_input_error(msg):
         valid_prio_rules = (
             "Valid `must_nest` description formats: "  # noqa
             "\"iname, iname, ...\" or (str, str, str, ...), "  # noqa
             "where str can be of form "  # noqa
             "\"iname\" or \"{iname, iname, ...}\". "  # noqa
             "No set complements allowed.\n"  # noqa
-            "Valid `must_not_nest` description tuples must have len <= 2: "  # noqa
+            "Valid `must_not_nest` description tuples must have length 2: "  # noqa
             "\"iname, iname\", \"iname, ~iname\", or "  # noqa
             "(str, str), where str can be of form "  # noqa
             "\"iname\", \"~iname\", \"{iname, iname, ...}\", or "  # noqa
@@ -239,25 +240,26 @@ def raise_loop_nest_input_error(msg):
 
     def _error_on_regex_match(match_str, target_str):
         if re.findall(match_str, target_str):
-            raise_loop_nest_input_error(
+            _raise_loop_nest_input_error(
                 "Unrecognized character(s) %s in nest string %s"
                 % (re.findall(match_str, target_str), target_str))
 
     def _process_iname_set_str(iname_set_str):
-        # convert something like ~{i,j} or ~i or "i,j" to an UnexpandedInameSet
+        # Convert something like ~{i,j} or ~i or "i,j" to an UnexpandedInameSet
 
-        # remove leading/trailing whitespace
+        # Remove leading/trailing whitespace
         iname_set_str_stripped = iname_set_str.strip()
 
         if not iname_set_str_stripped:
-            raise_loop_nest_input_error(
+            _raise_loop_nest_input_error(
                 "Found 0 inames in string %s."
                 % (iname_set_str))
 
+        # Process complement sets
         if iname_set_str_stripped[0] == "~":
             # Make sure compelement is allowed
             if not complement_sets_allowed:
-                raise_loop_nest_input_error(
+                _raise_loop_nest_input_error(
                     "Complement (~) not allowed in this loop nest string %s. "
                     "If you have a use-case where allowing a currently "
                     "disallowed set complement would be helpful, and the "
@@ -266,10 +268,10 @@ def _process_iname_set_str(iname_set_str):
                     "please contact the Loo.py maintainers."
                     % (iname_set_str))
 
-            # remove tilde
+            # Remove tilde
             iname_set_str_stripped = iname_set_str_stripped[1:]
             if "~" in iname_set_str_stripped:
-                raise_loop_nest_input_error(
+                _raise_loop_nest_input_error(
                     "Multiple complement symbols found in iname set string %s"
                     % (iname_set_str))
 
@@ -277,7 +279,7 @@ def _process_iname_set_str(iname_set_str):
             if "," in iname_set_str_stripped and not (
                     iname_set_str_stripped.startswith("{") and
                     iname_set_str_stripped.endswith("}")):
-                raise_loop_nest_input_error(
+                _raise_loop_nest_input_error(
                     "Complements of sets containing multiple inames must "
                     "enclose inames in braces: %s is not valid."
                     % (iname_set_str))
@@ -286,47 +288,49 @@ def _process_iname_set_str(iname_set_str):
         else:
             complement = False
 
-        # remove leading/trailing spaces
+        # Remove leading/trailing spaces
         iname_set_str_stripped = iname_set_str_stripped.strip(" ")
 
-        # make sure braces are valid and strip them
+        # Make sure braces are valid and strip them
         if iname_set_str_stripped[0] == "{":
             if not iname_set_str_stripped[-1] == "}":
-                raise_loop_nest_input_error(
+                _raise_loop_nest_input_error(
                     "Invalid braces: %s" % (iname_set_str))
             else:
-                # remove enclosing braces
+                # Remove enclosing braces
                 iname_set_str_stripped = iname_set_str_stripped[1:-1]
-        # if there are dangling braces around, they will be caught next
+        # (If there are dangling braces around, they will be caught next)
 
-        # remove any more spaces
+        # Remove any more spaces
         iname_set_str_stripped = iname_set_str_stripped.strip()
 
-        # should be no remaining special characters besides comma and space
+        # Should be no remaining special characters besides comma and space
         _error_on_regex_match(r"([^,\w ])", iname_set_str_stripped)
 
-        # split by commas or spaces to get inames
+        # Split by commas or spaces to get inames
         inames = re.findall(r"([\w]+)(?:[ |,]*|$)", iname_set_str_stripped)
 
-        # make sure iname count matches what we expect from comma count
+        # Make sure iname count matches what we expect from comma count
         if len(inames) != iname_set_str_stripped.count(",") + 1:
-            raise_loop_nest_input_error(
+            _raise_loop_nest_input_error(
                 "Found %d inames but expected %d in string %s."
                 % (len(inames), iname_set_str_stripped.count(",") + 1,
                    iname_set_str))
 
         if len(inames) == 0:
-            raise_loop_nest_input_error(
+            _raise_loop_nest_input_error(
                 "Found empty set in string %s."
                 % (iname_set_str))
 
+        # NOTE this won't catch certain cases of bad syntax, e.g., ("{h i j,,}", "k")
+
         return UnexpandedInameSet(
             set([s.strip() for s in iname_set_str_stripped.split(",")]),
             complement=complement)
 
     if isinstance(nesting, str):
-        # Enforce that priorities involving iname sets be passed as tuple
-        # Iname sets defined negatively with a single iname are allowed here
+        # Enforce that constraints involving iname sets be passed as tuple.
+        # Iname sets defined negatively with a *single* iname are allowed here.
 
         # Check for any special characters besides comma, space, and tilde.
         # E.g., curly braces would indicate that an iname set was NOT
@@ -337,23 +341,24 @@ def _process_iname_set_str(iname_set_str):
         nesting_as_tuple = tuple(
             _process_iname_set_str(set_str) for set_str in nesting.split(","))
     else:
-        # nesting not passed as string; process each tier
+        assert isinstance(nesting, (tuple, list))
+        # Process each tier
         nesting_as_tuple = tuple(
             _process_iname_set_str(set_str) for set_str in nesting)
 
-    # check max_inames_per_set
+    # Check max_inames_per_set
     if max_tuple_size and len(nesting_as_tuple) > max_tuple_size:
-        raise_loop_nest_input_error(
+        _raise_loop_nest_input_error(
             "Loop nest prioritization tuple %s exceeds max tuple size %d."
             % (nesting_as_tuple))
 
-    # make sure nesting has len > 1
+    # Make sure nesting has len > 1
     if len(nesting_as_tuple) <= 1:
-        raise_loop_nest_input_error(
+        _raise_loop_nest_input_error(
             "Loop nest prioritization tuple %s must have length > 1."
             % (nesting_as_tuple))
 
-    # return tuple of UnexpandedInameSets
+    # Return tuple of UnexpandedInameSets
     return nesting_as_tuple
 
 # }}}
@@ -363,22 +368,33 @@ def _process_iname_set_str(iname_set_str):
 
 def constrain_loop_nesting(
         kernel, must_nest=None, must_not_nest=None):
-    # TODO docstring
-    # TODO what if someone passes single-iname prio?
-    # TODO enforce that must_nest be a single tuple not list of tuples
-    # (or update implementation to allow list of tuples)
+    """Add the provided constraints to the kernel.
+
+    :arg must_nest: A tuple or comma-separated string representing
+        an ordering of loop nesting tiers that must appear in the
+        linearized kernel. Each item in the tuple represents a
+        :class:`UnexpandedInameSet`\ s.
 
-    # check for existing constraints
+    :arg must_not_nest: A two-tuple or comma-separated string representing
+        an ordering of loop nesting tiers that must not appear in the
+        linearized kernel. Each item in the tuple represents a
+        :class:`UnexpandedInameSet`\ s.
+
+    """
+
+    # {{{ Get any current constraints, if they exist
     if kernel.loop_nest_constraints:
         if kernel.loop_nest_constraints.must_nest:
             must_nest_constraints_old = kernel.loop_nest_constraints.must_nest
         else:
             must_nest_constraints_old = set()
+
         if kernel.loop_nest_constraints.must_not_nest:
             must_not_nest_constraints_old = \
                 kernel.loop_nest_constraints.must_not_nest
         else:
             must_not_nest_constraints_old = set()
+
         if kernel.loop_nest_constraints.must_nest_graph:
             must_nest_graph_old = kernel.loop_nest_constraints.must_nest_graph
         else:
@@ -388,20 +404,20 @@ def constrain_loop_nesting(
         must_not_nest_constraints_old = set()
         must_nest_graph_old = {}
 
-    # {{{ process must_nest
+    # }}}
 
-    # TODO remove (TEMPORARY HACK TO KEEP LEGACY CODE RUNNING)
-    # expand_must_priorities = set()
+    # {{{ Process must_nest
 
     if must_nest:
-        # {{{ parse must_nest, check for conflicts, combine with old constraints
+        # {{{ Parse must_nest, check for conflicts, combine with old constraints
 
-        # {{{ Parse must_nest; no complements allowed
+        # {{{ Parse must_nest (no complements allowed)
         must_nest_tuple = process_loop_nest_specification(
             must_nest, complement_sets_allowed=False)
         # }}}
 
         # {{{ Error if someone prioritizes concurrent iname
+
         from loopy.kernel.data import ConcurrentTag
         for iname_set in must_nest_tuple:
             for iname in iname_set.inames:
@@ -410,66 +426,69 @@ def constrain_loop_nesting(
                         "iname %s tagged with ConcurrentTag, "
                         "cannot use iname in must-nest constraint %s."
                         % (iname, must_nest_tuple))
+
         # }}}
 
-        # {{{ must_nest_graph_new <- update_must_nest_graph(...)
+        # {{{ Update must_nest graph (and check for cycles)
 
-        # (checks for cycles)
         must_nest_graph_new = update_must_nest_graph(
             must_nest_graph_old, must_nest_tuple, kernel.all_inames())
 
         # }}}
 
-        # {{{ make sure must_nest constraints don't violate must_not_nest
-        # this may not catch all problems (?)
+        # {{{ Make sure must_nest constraints don't violate must_not_nest
+        # (this may not catch all problems)
         check_must_not_nest_against_must_nest_graph(
             must_not_nest_constraints_old, must_nest_graph_new)
         # }}}
 
-        #  {{{ check for conflicts with inames tagged 'vec' (must be innermost)
+        # {{{ Check for conflicts with inames tagged 'vec' (must be innermost)
+
         from loopy.kernel.data import VectorizeTag
         for iname in kernel.all_inames():
             if kernel.iname_tags_of_type(iname, VectorizeTag) and (
                     must_nest_graph_new.get(iname, set())):
-                # Iname cannot be a leaf, error
+                # Must-nest graph doesn't allow iname to be a leaf, error
                 raise ValueError(
-                    "Iname %s tagged as 'vec', but loop priorities "
+                    "Iname %s tagged as 'vec', but loop nest constraints "
                     "%s require that iname %s nest outside of inames %s. "
                     "Vectorized inames must nest innermost; cannot "
                     "impose loop nest specification."
                     % (iname, must_nest, iname,
                     must_nest_graph_new.get(iname, set())))
-        # }}}
 
-        # TODO remove (TEMPORARY HACK TO KEEP LEGACY CODE RUNNING)
-        # expand_must_priorities = _expand_iname_sets_in_tuple(
-        #     must_nest_tuple, kernel.all_inames())
+        # }}}
 
-        # {{{ combine new must_nest constraints with old
+        # {{{ Add new must_nest constraints to existing must_nest constraints
         must_nest_constraints_new = must_nest_constraints_old | set(
             [must_nest_tuple, ])
         # }}}
 
         # }}}
     else:
-        # {{{ no new must_nest constraints, keep the old ones
+        # {{{ No new must_nest constraints, just keep the old ones
+
         must_nest_constraints_new = must_nest_constraints_old
         must_nest_graph_new = must_nest_graph_old
+
         # }}}
 
     # }}}
 
-    # {{{ process must_not_nest
+    # {{{ Process must_not_nest
 
     if must_not_nest:
-        # {{{ parse must_not_nest, check for conflicts, combine with old constraints
+        # {{{ Parse must_not_nest, check for conflicts, combine with old constraints
 
         # {{{ Parse must_not_nest; complements allowed; max_tuple_size=2
+
         must_not_nest_tuple = process_loop_nest_specification(
             must_not_nest, max_tuple_size=2)
+
         # }}}
 
-        # {{{ make sure must_not_nest constraints don't violate must_nest
+        # {{{ Make sure must_not_nest constraints don't violate must_nest
+
         # (cycles are allowed in must_not_nest constraints)
         import itertools
         must_pairs = []
@@ -482,17 +501,20 @@ def constrain_loop_nesting(
                 "must_not_nest constraints %s inconsistent with "
                 "must_nest constraints %s."
                 % (must_not_nest_tuple, must_nest_constraints_new))
+
         # }}}
 
-        # {{{ combine new must_not_nest constraints with old
+        # {{{ Add new must_not_nest constraints to exisitng must_not_nest constraints
         must_not_nest_constraints_new = must_not_nest_constraints_old | set([
             must_not_nest_tuple, ])
         # }}}
 
         # }}}
     else:
-        # {{{ no new must_not_nest constraints, keep the old ones
+        # {{{ No new must_not_nest constraints, just keep the old ones
+
         must_not_nest_constraints_new = must_not_nest_constraints_old
+
         # }}}
 
     # }}}
@@ -503,11 +525,7 @@ def constrain_loop_nesting(
         must_nest_graph=must_nest_graph_new,
         )
 
-    # TODO do something with old priorities???
-    return kernel.copy(
-            # loop_priority=kernel.loop_priority.union(expand_must_priorities),
-            loop_nest_constraints=nest_constraints,
-            )
+    return kernel.copy(loop_nest_constraints=nest_constraints)
 
 # }}}
 
@@ -515,34 +533,35 @@ def constrain_loop_nesting(
 # {{{ update_must_nest_graph
 
 def update_must_nest_graph(must_nest_graph, must_nest, all_inames):
-    # Note: there should not be any complements in the must_nest tuples
+    # Note: there should *not* be any complements in the must_nest tuples
+
     from copy import deepcopy
     new_graph = deepcopy(must_nest_graph)
 
-    # first, all inames must be a node in the graph:
+    # First, each iname must be a node in the graph
     for missing_iname in all_inames - new_graph.keys():
         new_graph[missing_iname] = set()
 
-    # get (before, after) pairs:
+    # Expand must_nest into (before, after) pairs
     must_nest_expanded = _expand_iname_sets_in_tuple(must_nest, all_inames)
 
-    # update graph:
+    # Update must_nest_graph with new pairs
     for before, after in must_nest_expanded:
         new_graph[before].add(after)
 
-    # compute transitive closure:
-    from pytools.graph import compute_transitive_closure
-    # Note: compute_transitive_closure now allows cycles, will not error
+    # Compute transitive closure
+    from pytools.graph import compute_transitive_closure, contains_cycle
     new_graph_closure = compute_transitive_closure(new_graph)
+    # Note: compute_transitive_closure now allows cycles, will not error
 
     # Check for inconsistent must_nest constraints by checking for cycle:
-    from pytools.graph import contains_cycle
     if contains_cycle(new_graph_closure):
         raise ValueError(
             "update_must_nest_graph: Nest constraint cycle detected. "
             "must_nest constraints %s inconsistent with existing "
             "must_nest constraints %s."
             % (must_nest, must_nest_graph))
+
     return new_graph_closure
 
 # }}}
@@ -551,15 +570,15 @@ def update_must_nest_graph(must_nest_graph, must_nest, all_inames):
 # {{{ _expand_iname_sets_in_tuple
 
 def _expand_iname_sets_in_tuple(
-        iname_sets_tuple,  # (UnexpandedInameSet, Unex..., ...)
-        all_inames,
+        iname_sets_tuple,
+        iname_universe=None,
         ):
 
-    # First convert negatively defined iname sets to sets
-    positively_defined_iname_sets = []
-    for iname_set in iname_sets_tuple:
-        positively_defined_iname_sets.append(
-            iname_set.get_inames_represented(all_inames))
+    # First convert UnexpandedInameSets to sets.
+    # Note that must_nest constraints cannot be negatively defined.
+    positively_defined_iname_sets = [
+        iname_set.get_inames_represented(iname_universe)
+        for iname_set in iname_sets_tuple]
 
     # Now expand all priority tuples into (before, after) pairs using
     # Cartesian product of all pairs of sets
@@ -577,6 +596,7 @@ def _expand_iname_sets_in_tuple(
             raise ValueError(
                 "Loop nesting %s contains cycle: %s. "
                 % (iname_sets_tuple, prio_tuple))
+
     return loop_priority_pairs
 
 # }}}
@@ -584,18 +604,33 @@ def _expand_iname_sets_in_tuple(
 # }}}
 
 
-# {{{ checking constraints
+# {{{ Checking constraints
 
 # {{{ check_must_nest
 
 def check_must_nest(all_loop_nests, must_nest, all_inames):
-    # in order to make sure must_nest is satisfied, we
+    """Determine whether must_nest constraint is satisfied by
+    all_loop_nests
+
+    :arg all_loop_nests: A list of lists of inames, each representing
+        the nesting order of nested loops.
+
+    :arg must_nest: A tuple of :class:`UnexpandedInameSet`\ s describing
+        nestings that must appear in all_loop_nests.
+
+    :returns: A :class:`bool` indicating whether the must nest constraints
+        are satisfied by the provided loop nesting.
+
+    """
+
+    # In order to make sure must_nest is satisfied, we
     # need to expand all must_nest tiers
 
-    # TODO instead of expanding tiers into all pairs up front,
+    # FIXME instead of expanding tiers into all pairs up front,
     # create these pairs one at a time so that we can stop as soon as we fail
 
-    must_nest_expanded = _expand_iname_sets_in_tuple(must_nest, all_inames)
+    must_nest_expanded = _expand_iname_sets_in_tuple(must_nest)
+
     # must_nest_expanded contains pairs
     for before, after in must_nest_expanded:
         found = False
@@ -614,12 +649,28 @@ def check_must_nest(all_loop_nests, must_nest, all_inames):
 # {{{ check_must_not_nest
 
 def check_must_not_nest(all_loop_nests, must_not_nest):
-    # recall that must_not_nest may only contain two tiers
+    """Determine whether must_not_nest constraint is satisfied by
+    all_loop_nests
+
+    :arg all_loop_nests: A list of lists of inames, each representing
+        the nesting order of nested loops.
+
+    :arg must_not_nest: A two-tuple of :class:`UnexpandedInameSet`\ s
+        describing nestings that must not appear in all_loop_nests.
+
+    :returns: A :class:`bool` indicating whether the must_not_nest constraints
+        are satisfied by the provided loop nesting.
+
+    """
+
+    # Note that must_not_nest may only contain two tiers
 
     for nesting in all_loop_nests:
-        # Go thru each pair in all_loop_nests
+
+        # Go through each pair in all_loop_nests
         for i, iname_before in enumerate(nesting):
             for iname_after in nesting[i+1:]:
+
                 # Check whether it violates must not nest
                 if (must_not_nest[0].contains(iname_before)
                         and must_not_nest[1].contains(iname_after)):
@@ -633,7 +684,20 @@ def check_must_not_nest(all_loop_nests, must_not_nest):
 # {{{ check_all_must_not_nests
 
 def check_all_must_not_nests(all_loop_nests, must_not_nests):
-    # recall that must_not_nest may only contain two tiers
+    """Determine whether all must_not_nest constraints are satisfied by
+    all_loop_nests
+
+    :arg all_loop_nests: A list of lists of inames, each representing
+        the nesting order of nested loops.
+
+    :arg must_not_nests: A set of two-tuples of :class:`UnexpandedInameSet`\ s
+        describing nestings that must not appear in all_loop_nests.
+
+    :returns: A :class:`bool` indicating whether the must_not_nest constraints
+        are satisfied by the provided loop nesting.
+
+    """
+
     for must_not_nest in must_not_nests:
         if not check_must_not_nest(all_loop_nests, must_not_nest):
             return False
@@ -646,19 +710,37 @@ def check_all_must_not_nests(all_loop_nests, must_not_nests):
 
 def loop_nest_constraints_satisfied(
         all_loop_nests,
-        must_nest_constraints,
-        must_not_nest_constraints,
-        all_inames):
+        must_nest_constraints=None,
+        must_not_nest_constraints=None,
+        all_inames=None):
+    """Determine whether must_not_nest constraint is satisfied by
+    all_loop_nests
+
+    :arg all_loop_nests: A set of lists of inames, each representing
+        the nesting order of loops.
+
+    :arg must_nest_constraints: An iterable of tuples of
+        :class:`UnexpandedInameSet`\ s, each describing nestings that must
+        appear in all_loop_nests.
+
+    :arg must_not_nest_constraints: An iterable of two-tuples of
+        :class:`UnexpandedInameSet`\ s, each describing nestings that must not
+        appear in all_loop_nests.
 
-    # check must-nest constraints
+    :returns: A :class:`bool` indicating whether the constraints
+        are satisfied by the provided loop nesting.
+
+    """
+
+    # Check must-nest constraints
     if must_nest_constraints:
         for must_nest in must_nest_constraints:
             if not check_must_nest(
                     all_loop_nests, must_nest, all_inames):
                 return False
 
-    # check must-not-nest constraints
-    if must_not_nest_constraints is not None:
+    # Check must-not-nest constraints
+    if must_not_nest_constraints:
         for must_not_nest in must_not_nest_constraints:
             if not check_must_not_nest(
                     all_loop_nests, must_not_nest):
@@ -673,8 +755,17 @@ def loop_nest_constraints_satisfied(
 
 def check_must_not_nest_against_must_nest_graph(
         must_not_nest_constraints, must_nest_graph):
-    # make sure none of the must_nest constraints violate must_not_nest
-    # this may not catch all problems
+    """Ensure none of the must_not_nest constraints are violated by
+    nestings represented in the must_nest_graph
+
+    :arg must_not_nest_constraints: A set of two-tuples of
+        :class:`UnexpandedInameSet`\ s describing nestings that must not appear
+        in loop nestings.
+
+    :arg must_nest_graph: A :class:`dict` mapping each iname to other inames
+        that must be nested inside it.
+
+    """
 
     if must_not_nest_constraints and must_nest_graph:
         import itertools
@@ -695,17 +786,19 @@ def check_must_not_nest_against_must_nest_graph(
 
 # {{{ get_iname_nestings
 
-def get_iname_nestings(outline):
+def get_iname_nestings(linearization):
+    """Return a list of iname tuples representing the deepest loop nestings
+    in a kernel linearization.
+    """
     from loopy.schedule import EnterLoop, LeaveLoop
-    # return a list of tuples representing deepest nestings
     nestings = []
     current_tiers = []
     already_exiting_loops = False
-    for outline_item in outline:
-        if isinstance(outline_item, EnterLoop):
+    for lin_item in linearization:
+        if isinstance(lin_item, EnterLoop):
             already_exiting_loops = False
-            current_tiers.append(outline_item.iname)
-        elif isinstance(outline_item, LeaveLoop):
+            current_tiers.append(lin_item.iname)
+        elif isinstance(lin_item, LeaveLoop):
             if not already_exiting_loops:
                 nestings.append(tuple(current_tiers))
                 already_exiting_loops = True
@@ -715,7 +808,7 @@ def get_iname_nestings(outline):
 # }}}
 
 
-# {{{ get graph sources
+# {{{ get_graph_sources
 
 def get_graph_sources(graph):
     sources = set(graph.keys())
diff --git a/test/test_nest_constraints.py b/test/test_nest_constraints.py
index 40e52420a..4f00dbac8 100644
--- a/test/test_nest_constraints.py
+++ b/test/test_nest_constraints.py
@@ -153,17 +153,7 @@ def test_loop_constraint_string_parsing():
             "Unrecognized character(s) [\'}\'] in nest string h}"
             ) in str(e)
 
-    # TODO these should pass
-    """
-    try:
-        lp.constrain_loop_nesting(ref_knl, must_nest=("{h i j,,}", "k"))
-        assert False
-    except ValueError as e:
-        assert("Unrecognized character(s) [\'{\', \'}\'] in nest string {h i j,,}"
-            ) in str(e)
-    """
-
-    # valid syntax
+    # Valid syntax
     lp.constrain_loop_nesting(ref_knl, must_not_nest=("~{j,i}", "{j,i}"))
     lp.constrain_loop_nesting(ref_knl, must_not_nest=("{h}", "{j,i}"))
     lp.constrain_loop_nesting(ref_knl, must_not_nest=("h", "{j,i}"))
@@ -172,7 +162,7 @@ def test_loop_constraint_string_parsing():
     lp.constrain_loop_nesting(ref_knl, must_not_nest="~j,j")
     lp.constrain_loop_nesting(ref_knl, must_nest="k,h,j")
 
-    # handling spaces
+    # Handling spaces
     knl = lp.constrain_loop_nesting(ref_knl, must_nest=("k", "{h }", " { j , i } "))
     assert list(knl.loop_nest_constraints.must_nest)[0][0].inames == set("k")
     assert list(knl.loop_nest_constraints.must_nest)[0][1].inames == set("h")
@@ -205,7 +195,7 @@ def test_loop_nest_constraints_satisfied():
 
     must_nest_constraints = [
         process_loop_nest_specification(
-            nesting=("{g,h}", "~{g,h}"),
+            nesting=("{g,h}", "{i,j,k}"),
             complement_sets_allowed=True),
         ]
     must_not_nest_constraints = [
@@ -351,7 +341,6 @@ def test_incompatible_nest_constraints():
             """,
             assumptions="n >= 1",
             )
-    ref_knl = lp.add_and_infer_dtypes(ref_knl, {"a,a2,a3": np.dtype(np.float32)})
     knl = ref_knl
     knl = lp.constrain_loop_nesting(
         knl, must_not_nest=("{k,i}", "~{k,i}"))

From c7432a3d579783c105fecd876181b57350a4fa70 Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Sat, 8 May 2021 23:22:52 -0500
Subject: [PATCH 375/460] fix some typos

---
 loopy/transform/iname.py | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/loopy/transform/iname.py b/loopy/transform/iname.py
index a5f6176f3..036fdd2ef 100644
--- a/loopy/transform/iname.py
+++ b/loopy/transform/iname.py
@@ -129,8 +129,8 @@ def __init__(self, inames, complement=False):
 
     def contains(self, inames):
         if isinstance(inames, set):
-            return (not (iname_set & self.inames) if self.complement
-                else iname_set.issubset(self.inames))
+            return (not (inames & self.inames) if self.complement
+                else inames.issubset(self.inames))
         else:
             return (inames not in self.inames if self.complement
                 else inames in self.inames)
@@ -368,7 +368,7 @@ def _process_iname_set_str(iname_set_str):
 
 def constrain_loop_nesting(
         kernel, must_nest=None, must_not_nest=None):
-    """Add the provided constraints to the kernel.
+    r"""Add the provided constraints to the kernel.
 
     :arg must_nest: A tuple or comma-separated string representing
         an ordering of loop nesting tiers that must appear in the
@@ -609,7 +609,7 @@ def _expand_iname_sets_in_tuple(
 # {{{ check_must_nest
 
 def check_must_nest(all_loop_nests, must_nest, all_inames):
-    """Determine whether must_nest constraint is satisfied by
+    r"""Determine whether must_nest constraint is satisfied by
     all_loop_nests
 
     :arg all_loop_nests: A list of lists of inames, each representing
@@ -649,7 +649,7 @@ def check_must_nest(all_loop_nests, must_nest, all_inames):
 # {{{ check_must_not_nest
 
 def check_must_not_nest(all_loop_nests, must_not_nest):
-    """Determine whether must_not_nest constraint is satisfied by
+    r"""Determine whether must_not_nest constraint is satisfied by
     all_loop_nests
 
     :arg all_loop_nests: A list of lists of inames, each representing
@@ -684,7 +684,7 @@ def check_must_not_nest(all_loop_nests, must_not_nest):
 # {{{ check_all_must_not_nests
 
 def check_all_must_not_nests(all_loop_nests, must_not_nests):
-    """Determine whether all must_not_nest constraints are satisfied by
+    r"""Determine whether all must_not_nest constraints are satisfied by
     all_loop_nests
 
     :arg all_loop_nests: A list of lists of inames, each representing
@@ -713,7 +713,7 @@ def loop_nest_constraints_satisfied(
         must_nest_constraints=None,
         must_not_nest_constraints=None,
         all_inames=None):
-    """Determine whether must_not_nest constraint is satisfied by
+    r"""Determine whether must_not_nest constraint is satisfied by
     all_loop_nests
 
     :arg all_loop_nests: A set of lists of inames, each representing
@@ -755,7 +755,7 @@ def loop_nest_constraints_satisfied(
 
 def check_must_not_nest_against_must_nest_graph(
         must_not_nest_constraints, must_nest_graph):
-    """Ensure none of the must_not_nest constraints are violated by
+    r"""Ensure none of the must_not_nest constraints are violated by
     nestings represented in the must_nest_graph
 
     :arg must_not_nest_constraints: A set of two-tuples of

From e07d0ecf9e324e604c26adb6c277602fddb006a0 Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Sat, 8 May 2021 23:38:11 -0500
Subject: [PATCH 376/460] fix typo in comment

---
 loopy/transform/iname.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/loopy/transform/iname.py b/loopy/transform/iname.py
index 036fdd2ef..a62ba5e1c 100644
--- a/loopy/transform/iname.py
+++ b/loopy/transform/iname.py
@@ -346,7 +346,7 @@ def _process_iname_set_str(iname_set_str):
         nesting_as_tuple = tuple(
             _process_iname_set_str(set_str) for set_str in nesting)
 
-    # Check max_inames_per_set
+    # Check max_tuple_size
     if max_tuple_size and len(nesting_as_tuple) > max_tuple_size:
         _raise_loop_nest_input_error(
             "Loop nest prioritization tuple %s exceeds max tuple size %d."

From 502e155206fed7f4c9adff8aae7ac29e7f98d0ce Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Sun, 16 May 2021 19:52:03 -0500
Subject: [PATCH 377/460] add find_and_rename_dim()

---
 loopy/schedule/checker/utils.py | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/loopy/schedule/checker/utils.py b/loopy/schedule/checker/utils.py
index 401fd477a..c02149790 100644
--- a/loopy/schedule/checker/utils.py
+++ b/loopy/schedule/checker/utils.py
@@ -107,6 +107,14 @@ def add_eq_isl_constraint_from_names(isl_map, var1, var2):
                    {1: 0, var1: 1, var2: -1}))
 
 
+def find_and_rename_dim(old_map, dim_types, old_name, new_name):
+    new_map = old_map.copy()
+    for dim_type in dim_types:
+        new_map = new_map.set_dim_name(
+            dim_type, new_map.find_dim_by_name(dim_type, old_name), new_name)
+    return new_map
+
+
 def append_mark_to_isl_map_var_names(old_isl_map, dim_type, mark):
     """Return an :class:`islpy.Map` with a mark appended to the specified
     dimension names.

From 003eed17e4dab494ef5630eea83348fabe144dcc Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Sun, 16 May 2021 19:53:22 -0500
Subject: [PATCH 378/460] in map_domain, deal with situation where transform
 map doesn't include all the inames in the domain (in which case the missing
 inames should be unaffected

---
 loopy/transform/iname.py | 77 ++++++++++++++++++++++++++++++++--------
 1 file changed, 62 insertions(+), 15 deletions(-)

diff --git a/loopy/transform/iname.py b/loopy/transform/iname.py
index 29a27f074..3c851a0aa 100644
--- a/loopy/transform/iname.py
+++ b/loopy/transform/iname.py
@@ -67,6 +67,8 @@
 
 .. autofunction:: add_inames_to_insn
 
+.. autofunction:: map_domain
+
 .. autofunction:: add_inames_for_unused_hw_axes
 
 """
@@ -1887,6 +1889,8 @@ def _find_aff_subst_from_map(iname, isl_map):
     raise LoopyError("no suitable equation for '%s' found" % iname)
 
 
+# TODO swap dt and dim_type
+
 def map_domain(kernel, isl_map, within=None, rename_after={}):
     # FIXME: Express _split_iname_backend in terms of this
     #   Missing/deleted for now:
@@ -1947,6 +1951,10 @@ def _do_not_transform_if_no_within_matches():
 
     # }}}
 
+    from loopy.schedule.checker.utils import (
+        add_and_name_isl_dims,
+    )
+
     def process_set(s):
         var_dict = s.get_var_dict()
 
@@ -1962,6 +1970,11 @@ def process_set(s):
                     "either involve all or none of the map domain "
                     "inames." % s)
 
+        from loopy.schedule.checker.utils import (
+            find_and_rename_dim,
+            add_eq_isl_constraint_from_names,
+        )
+
         # {{{ align dims of isl_map and s
 
         # FIXME: Make this less gross
@@ -1970,6 +1983,40 @@ def process_set(s):
 
         map_with_s_domain = isl.Map.from_domain(s)
 
+        # {{{ deal with dims missing from transform map (isl_map)
+
+        # If dims in s are missing from transform map, they need to be added
+        # so that intersect_domain doesn't remove them.
+        # Order doesn't matter here because dims will be aligned in the next step.
+        dims_missing_from_transform_map = list(
+            set(s.get_var_names(dim_type.set)) -
+            set(isl_map.get_var_names(dim_type.in_)))
+        augmented_isl_map = add_and_name_isl_dims(
+            isl_map, dim_type.in_, dims_missing_from_transform_map)
+
+        # We want these missing inames to map to themselves so that the transform
+        # has no effect on them. Unfortunatley isl will break if the
+        # names of the out dims aren't unique, so we will temporariliy rename them
+        # and then change the names back afterward.
+
+        # FIXME: need better way to make sure proxy dim names are unique
+        dims_missing_from_transform_map_proxies = [
+            d+"__prox" for d in dims_missing_from_transform_map]
+        assert not set(dims_missing_from_transform_map_proxies) & set(
+            augmented_isl_map.get_var_dict().keys())
+
+        augmented_isl_map = add_and_name_isl_dims(
+            augmented_isl_map, dim_type.out, dims_missing_from_transform_map_proxies)
+
+        # Set proxy iname equal to real iname
+        for proxy_iname, real_iname in zip(
+                dims_missing_from_transform_map_proxies,
+                dims_missing_from_transform_map):
+            augmented_isl_map = add_eq_isl_constraint_from_names(
+                augmented_isl_map, proxy_iname, real_iname)
+
+        # }}}
+
         dim_types = [dim_type.param, dim_type.in_, dim_type.out]
         s_names = [
                 map_with_s_domain.get_dim_name(dt, i)
@@ -1977,36 +2024,36 @@ def process_set(s):
                 for i in range(map_with_s_domain.dim(dt))
                 ]
         map_names = [
-                isl_map.get_dim_name(dt, i)
+                augmented_isl_map.get_dim_name(dt, i)
                 for dt in dim_types
-                for i in range(isl_map.dim(dt))
+                for i in range(augmented_isl_map.dim(dt))
                 ]
+
         # (order doesn't matter in s_names/map_names,
         # _align_dim_type just converts these to sets
         # to determine which names are in both the obj and template,
         # not sure why this isn't just handled inside _align_dim_type)
         aligned_map = _align_dim_type(
                 dim_type.param,
-                isl_map, map_with_s_domain, False,
+                augmented_isl_map, map_with_s_domain, False,
                 map_names, s_names)
         aligned_map = _align_dim_type(
                 dim_type.in_,
                 aligned_map, map_with_s_domain, False,
                 map_names, s_names)
-        # Old code
-        """
-        aligned_map = _align_dim_type(
-                dim_type.param,
-                isl_map, map_with_s_domain, obj_bigger_ok=False,
-                obj_names=map_names, tgt_names=s_names)
-        aligned_map = _align_dim_type(
-                dim_type.in_,
-                isl_map, map_with_s_domain, obj_bigger_ok=False,
-                obj_names=map_names, tgt_names=s_names)
-        """
+
         # }}}
 
-        return aligned_map.intersect_domain(s).range()
+        new_s = aligned_map.intersect_domain(s).range()
+
+        # Now rename the proxy dims back to their original names
+        for proxy_iname, real_iname in zip(
+                dims_missing_from_transform_map_proxies,
+                dims_missing_from_transform_map):
+            new_s = find_and_rename_dim(
+                new_s, [dim_type.set], proxy_iname, real_iname)
+
+        return new_s
 
         # FIXME: Revive _project_out_only_if_all_instructions_in_within
 

From 5138b16137d57fb29e80ac4cb31c1c964ccd2cb4 Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Sun, 16 May 2021 19:53:55 -0500
Subject: [PATCH 379/460] test map_domain with situation where transform map
 doesn't include all the inames in the domain (in which case the missing
 inames should be unaffected)

---
 test/test_transform.py | 91 ++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 91 insertions(+)

diff --git a/test/test_transform.py b/test/test_transform.py
index 6954513c8..00e1896a4 100644
--- a/test/test_transform.py
+++ b/test/test_transform.py
@@ -645,6 +645,97 @@ def test_map_domain_vs_split_iname():
 # }}}
 
 
+# {{{ test_map_domain_with_transform_map_missing_dims
+
+def test_map_domain_with_transform_map_missing_dims():
+    # Make sure map_domain works correctly when the mapping doesn't include
+    # all the dims in the domain.
+
+    # {{{ Make kernel
+
+    knl = lp.make_kernel(
+        [
+            "[nx,nt] -> {[x, y, z, t]: 0 <= x,y,z < nx and 0 <= t < nt}",
+        ],
+        """
+        a[y,x,t,z] = b[y,x,t,z]  {id=stmta}
+        """,
+        lang_version=(2018, 2),
+        )
+    knl = lp.add_and_infer_dtypes(knl, {"b": np.float32})
+    ref_knl = knl
+
+    # }}}
+
+    # {{{ Apply domain change mapping
+
+    knl_map_dom = ref_knl  # loop priority goes away, deps stay
+
+    # Create map_domain mapping that only includes t and y
+    # (x and z should be unaffected)
+    import islpy as isl
+    transform_map = isl.BasicMap(
+        "[nx,nt] -> {[t, y] -> [t_outer, t_inner, y_new]: "
+        "0 <= t_inner < 32 and "
+        "32*t_outer + t_inner = t and "
+        "0 <= 32*t_outer + t_inner < nt and "
+        "y = y_new"
+        "}")
+
+    # Call map_domain to transform kernel
+    knl_map_dom = lp.map_domain(knl_map_dom, transform_map)
+
+    # Prioritize loops (prio should eventually be updated in map_domain?)
+    try:
+        # Use constrain_loop_nesting if it's available
+        desired_prio = "x, t_outer, t_inner, z, y_new"
+        knl_map_dom = lp.constrain_loop_nesting(knl_map_dom, desired_prio)
+    except AttributeError:
+        # For some reason, prioritize_loops can't handle the ordering above
+        # when linearizing knl_split_iname below
+        desired_prio = "z, y_new, x, t_outer, t_inner"
+        knl_map_dom = lp.prioritize_loops(knl_map_dom, desired_prio)
+
+    # Get a linearization
+    proc_knl_map_dom = lp.preprocess_kernel(knl_map_dom)
+    lin_knl_map_dom = lp.get_one_linearized_kernel(proc_knl_map_dom)
+
+    # }}}
+
+    # {{{ Split iname and see if we get the same result
+
+    knl_split_iname = ref_knl
+    knl_split_iname = lp.split_iname(knl_split_iname, "t", 32)
+    knl_split_iname = lp.rename_iname(knl_split_iname, "y", "y_new")
+    try:
+        # Use constrain_loop_nesting if it's available
+        knl_split_iname = lp.constrain_loop_nesting(knl_split_iname, desired_prio)
+    except AttributeError:
+        knl_split_iname = lp.prioritize_loops(knl_split_iname, desired_prio)
+    proc_knl_split_iname = lp.preprocess_kernel(knl_split_iname)
+    lin_knl_split_iname = lp.get_one_linearized_kernel(proc_knl_split_iname)
+
+    from loopy.schedule.checker.utils import (
+        ensure_dim_names_match_and_align,
+    )
+    for d_map_domain, d_split_iname in zip(
+            knl_map_dom.domains, knl_split_iname.domains):
+        d_map_domain_aligned = ensure_dim_names_match_and_align(
+            d_map_domain, d_split_iname)
+        assert d_map_domain_aligned == d_split_iname
+
+    for litem_map_domain, litem_split_iname in zip(
+            lin_knl_map_dom.linearization, lin_knl_split_iname.linearization):
+        assert litem_map_domain == litem_split_iname
+
+    # Can't easily compare instructions because equivalent subscript
+    # expressions may have different orders
+
+    # }}}
+
+# }}}
+
+
 def test_diamond_tiling(ctx_factory, interactive=False):
     ctx = ctx_factory()
     queue = cl.CommandQueue(ctx)

From 87ca4fd8073d37f737f4889646f60ef675217adb Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Sun, 16 May 2021 20:21:55 -0500
Subject: [PATCH 380/460] when checking the overlap condition in map_domain,
 use the actual set/map dims that matter, which may be different than
 old_inames

---
 loopy/transform/iname.py | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/loopy/transform/iname.py b/loopy/transform/iname.py
index b3b8fefb9..509b820eb 100644
--- a/loopy/transform/iname.py
+++ b/loopy/transform/iname.py
@@ -2247,7 +2247,8 @@ def process_set(s):
     def _apply_transform_map_to_depender(dep_map):
 
         # Check overlap condition
-        overlap = _check_overlap_condition_for_domain(dep_map.range(), old_inames)
+        overlap = _check_overlap_condition_for_domain(
+            dep_map.range(), set(dep_transform_map.get_var_names(dt.in_)))
 
         if not overlap:
             # Inames in s are not present in depender, don't change dep_map
@@ -2266,13 +2267,11 @@ def _apply_transform_map_to_depender(dep_map):
             # Now we've renamed statement var, so fix it (assume statement dim is 0)
             return transformed_dep_map.set_dim_name(dt.out, 0, STATEMENT_VAR_NAME)
 
-    old_inames_marked = frozenset(old_iname+BEFORE_MARK for old_iname in old_inames)
-
     def _apply_transform_map_to_dependee(dep_map):
 
         # Check overlap condition
         overlap = _check_overlap_condition_for_domain(
-            dep_map.domain(), old_inames_marked)
+            dep_map.domain(), set(dep_transform_map_marked.get_var_names(dt.in_)))
 
         if not overlap:
             # Inames in s are not present in dependee, don't change dep_map

From fc9d92f0203ef7d3a162d23c21b8fcb3793a099b Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Sun, 16 May 2021 20:23:52 -0500
Subject: [PATCH 381/460] remove duplicate function def

---
 loopy/schedule/checker/utils.py | 8 --------
 1 file changed, 8 deletions(-)

diff --git a/loopy/schedule/checker/utils.py b/loopy/schedule/checker/utils.py
index 4eb59630f..a31227d7a 100644
--- a/loopy/schedule/checker/utils.py
+++ b/loopy/schedule/checker/utils.py
@@ -146,14 +146,6 @@ def append_mark_to_isl_map_var_names(old_isl_map, dim_type, mark):
     return new_map
 
 
-def find_and_rename_dim(old_map, dim_types, old_name, new_name):
-    new_map = old_map.copy()
-    for dim_type in dim_types:
-        new_map = new_map.set_dim_name(
-            dim_type, new_map.find_dim_by_name(dim_type, old_name), new_name)
-    return new_map
-
-
 def append_mark_to_strings(strings, mark):
     return [s+mark for s in strings]
 

From 46580b54f395ed9bdc1123b850265e62f8b970a5 Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Sun, 16 May 2021 20:42:01 -0500
Subject: [PATCH 382/460] in map_domain, ignore statement var name in maps when
 performing iname overlap check

---
 loopy/transform/iname.py | 21 +++++++++++++++------
 1 file changed, 15 insertions(+), 6 deletions(-)

diff --git a/loopy/transform/iname.py b/loopy/transform/iname.py
index 509b820eb..565c71cf7 100644
--- a/loopy/transform/iname.py
+++ b/loopy/transform/iname.py
@@ -2077,12 +2077,25 @@ def map_domain(kernel, isl_map, within=None, rename_after={}):
 
     # }}}
 
+    from loopy.schedule.checker.schedule import (
+        BEFORE_MARK,
+        STATEMENT_VAR_NAME,
+    )
+
     def _check_overlap_condition_for_domain(s, transform_map_in_names):
+
+        names_to_ignore = set([STATEMENT_VAR_NAME, STATEMENT_VAR_NAME+BEFORE_MARK])
+        transform_map_in_inames = transform_map_in_names - names_to_ignore
+
         var_dict = s.get_var_dict()
 
-        overlap = transform_map_in_names & frozenset(var_dict)
+        overlap = transform_map_in_inames & frozenset(var_dict)
 
-        if overlap and len(overlap) != len(transform_map_in_names):
+        # If there is any overlap in the inames in the transform map and s
+        # (note that we're ignoring the statement var name, which may have been
+        # added to a transform map or s), all of the transform map inames must be in
+        # the overlap.
+        if overlap and len(overlap) != len(transform_map_in_inames):
             raise LoopyError("loop domain '%s' involves a part "
                     "of the map domain inames. Domains must "
                     "either involve all or none of the map domain "
@@ -2198,10 +2211,6 @@ def process_set(s):
         insert_and_name_isl_dims,
         add_eq_isl_constraint_from_names,
     )
-    from loopy.schedule.checker.schedule import (
-        BEFORE_MARK,
-        STATEMENT_VAR_NAME,
-    )
     dt = isl.dim_type
 
     # Create version of transform map with before marks

From 25cea94de84508383a4b8642cfc7a255d020980f Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Sun, 16 May 2021 20:45:25 -0500
Subject: [PATCH 383/460] in map_domain, ignore statement var name in maps when
 performing iname overlap check

---
 loopy/transform/iname.py | 21 +++++++++++++++------
 1 file changed, 15 insertions(+), 6 deletions(-)

diff --git a/loopy/transform/iname.py b/loopy/transform/iname.py
index 509b820eb..565c71cf7 100644
--- a/loopy/transform/iname.py
+++ b/loopy/transform/iname.py
@@ -2077,12 +2077,25 @@ def map_domain(kernel, isl_map, within=None, rename_after={}):
 
     # }}}
 
+    from loopy.schedule.checker.schedule import (
+        BEFORE_MARK,
+        STATEMENT_VAR_NAME,
+    )
+
     def _check_overlap_condition_for_domain(s, transform_map_in_names):
+
+        names_to_ignore = set([STATEMENT_VAR_NAME, STATEMENT_VAR_NAME+BEFORE_MARK])
+        transform_map_in_inames = transform_map_in_names - names_to_ignore
+
         var_dict = s.get_var_dict()
 
-        overlap = transform_map_in_names & frozenset(var_dict)
+        overlap = transform_map_in_inames & frozenset(var_dict)
 
-        if overlap and len(overlap) != len(transform_map_in_names):
+        # If there is any overlap in the inames in the transform map and s
+        # (note that we're ignoring the statement var name, which may have been
+        # added to a transform map or s), all of the transform map inames must be in
+        # the overlap.
+        if overlap and len(overlap) != len(transform_map_in_inames):
             raise LoopyError("loop domain '%s' involves a part "
                     "of the map domain inames. Domains must "
                     "either involve all or none of the map domain "
@@ -2198,10 +2211,6 @@ def process_set(s):
         insert_and_name_isl_dims,
         add_eq_isl_constraint_from_names,
     )
-    from loopy.schedule.checker.schedule import (
-        BEFORE_MARK,
-        STATEMENT_VAR_NAME,
-    )
     dt = isl.dim_type
 
     # Create version of transform map with before marks

From 2d0d346186035593297e41f8f7e3fa4c4fd72290 Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Mon, 17 May 2021 02:25:19 -0500
Subject: [PATCH 384/460] add move_dim_to_index()

---
 loopy/schedule/checker/utils.py | 42 +++++++++++++++++++++++++++++++++
 1 file changed, 42 insertions(+)

diff --git a/loopy/schedule/checker/utils.py b/loopy/schedule/checker/utils.py
index a31227d7a..42009aba2 100644
--- a/loopy/schedule/checker/utils.py
+++ b/loopy/schedule/checker/utils.py
@@ -84,6 +84,48 @@ def reorder_dims_by_name(
     return new_set
 
 
+def move_dim_to_index(
+        isl_map, dim_name, dim_type, destination_idx):
+    """Return an isl map with the specified dimension moved to
+    the specified index.
+
+    :arg isl_map: A :class:`islpy.Map`.
+
+    :arg dim_name: A :class:`str` specifying the name of the dimension
+        to be moved.
+
+    :arg dim_type: A :class:`islpy.dim_type`, i.e., an :class:`int`,
+        specifying the type of dimension to be reordered.
+
+    :arg destination_idx: A :class:`int` specifying the desired dimension
+        index of the dimention to be moved.
+
+    :returns: An :class:`islpy.Map` matching `isl_map` with the
+        specified dimension moved to the specified index.
+
+    """
+
+    assert dim_type != dt.param
+
+    layover_dim_type = dt.param
+    layover_dim_len = len(isl_map.get_var_names(layover_dim_type))
+
+    current_idx = isl_map.find_dim_by_name(dim_type, dim_name)
+    if current_idx == -1:
+        raise ValueError("Dimension name %s not found in dim type %s of %s"
+            % (dim_name, dim_type, isl_map))
+
+    if current_idx != destination_idx:
+        # First move to other dim because isl is stupid
+        new_map = isl_map.move_dims(
+            layover_dim_type, layover_dim_len, dim_type, current_idx, 1)
+        # Now move it where we actually want it
+        new_map = new_map.move_dims(
+            dim_type, destination_idx, layover_dim_type, layover_dim_len, 1)
+
+    return new_map
+
+
 def remove_dim_by_name(isl_map, dim_type, dim_name):
     idx = isl_map.find_dim_by_name(dim_type, dim_name)
     if idx == -1:

From 0a3e32e98c7c7d7c7808cd8b11bc5b1ce31fba98 Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Mon, 17 May 2021 02:27:23 -0500
Subject: [PATCH 385/460] in map_domain() handle dependencies correctly when
 the transform map only covers a subset of the inames in the dependency (leave
 the other inames unchanged)

---
 loopy/transform/iname.py | 221 ++++++++++++++++++++++-----------------
 1 file changed, 126 insertions(+), 95 deletions(-)

diff --git a/loopy/transform/iname.py b/loopy/transform/iname.py
index 565c71cf7..1c426fea1 100644
--- a/loopy/transform/iname.py
+++ b/loopy/transform/iname.py
@@ -2021,6 +2021,45 @@ def _find_aff_subst_from_map(iname, isl_map):
     raise LoopyError("no suitable equation for '%s' found" % iname)
 
 
+def _apply_identity_for_missing_map_dims(mapping, desired_dims):
+    from loopy.schedule.checker.utils import (
+        add_and_name_isl_dims,
+        add_eq_isl_constraint_from_names,
+    )
+
+    # If dims in s are missing from transform map, they need to be added
+    # so that, e.g, intersect_domain doesn't remove them.
+    # (assume ordering will be handled afterward)
+
+    # TODO remove sorted
+    missing_dims = sorted(list(
+        set(desired_dims) - set(mapping.get_var_names(dim_type.in_))))
+    augmented_mapping = add_and_name_isl_dims(
+        mapping, dim_type.in_, missing_dims)
+
+    # We want these missing inames to map to themselves so that the map
+    # has no effect on them. Unfortunatley isl will break if the
+    # names of the out dims aren't unique, so we will temporariliy rename them
+    # (and then plan to change the names back afterward).
+
+    # FIXME: need better way to make sure proxy dim names are unique within map
+    missing_dims_proxies = [d+"__prox" for d in missing_dims]
+    assert not set(missing_dims_proxies) & set(
+        augmented_mapping.get_var_dict().keys())
+
+    augmented_mapping = add_and_name_isl_dims(
+        augmented_mapping, dim_type.out, missing_dims_proxies)
+
+    proxy_name_pairs = list(zip(missing_dims, missing_dims_proxies))
+
+    # Set proxy iname equal to real iname with equality constraint
+    for real_iname, proxy_iname in proxy_name_pairs:
+        augmented_mapping = add_eq_isl_constraint_from_names(
+            augmented_mapping, proxy_iname, real_iname)
+
+    return augmented_mapping, proxy_name_pairs
+
+
 # TODO swap dt and dim_type
 
 def map_domain(kernel, isl_map, within=None, rename_after={}):
@@ -2105,6 +2144,7 @@ def _check_overlap_condition_for_domain(s, transform_map_in_names):
 
     from loopy.schedule.checker.utils import (
         add_and_name_isl_dims,
+        find_and_rename_dim,
     )
 
     def process_set(s):
@@ -2114,53 +2154,32 @@ def process_set(s):
             # inames in s are not present in transform map, don't change s
             return s
 
+        # At this point, overlap condition check guarantees that the
+        # in-dims of the transform map are a subset of the dims we're
+        # about to change.
+
         from loopy.schedule.checker.utils import (
-            find_and_rename_dim,
             add_eq_isl_constraint_from_names,
         )
 
         # {{{ align dims of isl_map and s
 
-        # FIXME: Make this less gross
-        # FIXME: Make an exported/documented interface of this in islpy
         from islpy import _align_dim_type
 
         map_with_s_domain = isl.Map.from_domain(s)
 
-        # {{{ deal with dims missing from transform map (isl_map)
-
-        # If dims in s are missing from transform map, they need to be added
-        # so that intersect_domain doesn't remove them.
-        # Order doesn't matter here because dims will be aligned in the next step.
-        dims_missing_from_transform_map = list(
-            set(s.get_var_names(dim_type.set)) -
-            set(isl_map.get_var_names(dim_type.in_)))
-        augmented_isl_map = add_and_name_isl_dims(
-            isl_map, dim_type.in_, dims_missing_from_transform_map)
-
-        # We want these missing inames to map to themselves so that the transform
-        # has no effect on them. Unfortunatley isl will break if the
-        # names of the out dims aren't unique, so we will temporariliy rename them
-        # and then change the names back afterward.
-
-        # FIXME: need better way to make sure proxy dim names are unique
-        dims_missing_from_transform_map_proxies = [
-            d+"__prox" for d in dims_missing_from_transform_map]
-        assert not set(dims_missing_from_transform_map_proxies) & set(
-            augmented_isl_map.get_var_dict().keys())
-
-        augmented_isl_map = add_and_name_isl_dims(
-            augmented_isl_map, dim_type.out, dims_missing_from_transform_map_proxies)
-
-        # Set proxy iname equal to real iname
-        for proxy_iname, real_iname in zip(
-                dims_missing_from_transform_map_proxies,
-                dims_missing_from_transform_map):
-            augmented_isl_map = add_eq_isl_constraint_from_names(
-                augmented_isl_map, proxy_iname, real_iname)
-
-        # }}}
+        # If there are dims in s that are not mapped by isl_map, add them
+        # to the in/out space of isl_map so that they remain unchanged.
+        # (temporary proxy dim names are needed in out space of transform
+        # map because isl won't allow any dim names to match, i.e., instead
+        # of just mapping {[unused_iname]->[unused_iname]}, we have to map
+        # {[unused_name]->[unused_name__prox] : unused_name__prox = unused_name},
+        # and then rename unused_name__prox afterward.)
+        augmented_isl_map, proxy_name_pairs = _apply_identity_for_missing_map_dims(
+            isl_map, s.get_var_names(dim_type.set))
 
+        # FIXME: Make this less gross
+        # FIXME: Make an exported/documented interface of this in islpy
         dim_types = [dim_type.param, dim_type.in_, dim_type.out]
         s_names = [
                 map_with_s_domain.get_dim_name(dt, i)
@@ -2191,9 +2210,7 @@ def process_set(s):
         new_s = aligned_map.intersect_domain(s).range()
 
         # Now rename the proxy dims back to their original names
-        for proxy_iname, real_iname in zip(
-                dims_missing_from_transform_map_proxies,
-                dims_missing_from_transform_map):
+        for real_iname, proxy_iname in proxy_name_pairs:
             new_s = find_and_rename_dim(
                 new_s, [dim_type.set], proxy_iname, real_iname)
 
@@ -2208,97 +2225,111 @@ def process_set(s):
     # Prep transform map to be applied to dependency
     from loopy.transform.instruction import map_dependency_maps
     from loopy.schedule.checker.utils import (
-        insert_and_name_isl_dims,
-        add_eq_isl_constraint_from_names,
+        append_mark_to_isl_map_var_names,
+        move_dim_to_index,
     )
     dt = isl.dim_type
 
     # Create version of transform map with before marks
-    # (for aligning when applying map to domains of dependees)
-    from loopy.schedule.checker.utils import (
-        append_mark_to_isl_map_var_names,
-    )
-    dep_transform_map_marked = append_mark_to_isl_map_var_names(
-        isl_map, dt.in_, BEFORE_MARK)
-
-    # Insert 'statement' dim into transform maps
-    # (mark the 'in' statement in BOTH cases)
-
-    # NOTE: dims must all be named correctly for the alignment to work, but dim names
-    # must also be unique, so the output statement var name can't match the input
-    # statement var name, which means in order to have the map keep the statement
-    # dim unchanged, (map statement_var -> statement_var), we have to change its
-    # name and then change it back afterward.
-
-    # (TODO: create a function that makes it easier to apply a transform map
-    # (tgt.apply_domain/tgt.apply_range) when the input dims of the transform map
-    # are a *subset* of the domain/range of the tgt, in which case the extra dims
-    # remain unchanged.)
-
-    dep_transform_map_marked = insert_and_name_isl_dims(
-        dep_transform_map_marked, dt.in_, [STATEMENT_VAR_NAME+BEFORE_MARK], 0)
-    dep_transform_map_marked = insert_and_name_isl_dims(
-        dep_transform_map_marked, dt.out, [STATEMENT_VAR_NAME], 0)
-    # Add stmt = stmt' constraint
-    dep_transform_map_marked = add_eq_isl_constraint_from_names(
-        dep_transform_map_marked, STATEMENT_VAR_NAME, STATEMENT_VAR_NAME+BEFORE_MARK)
-
-    # Temporarily rename stmt in 'out' dim for reason described above
-    temp_stmt_var = STATEMENT_VAR_NAME+"__"
-    dep_transform_map = insert_and_name_isl_dims(
-        isl_map, dt.in_, [STATEMENT_VAR_NAME], 0)
-    dep_transform_map = insert_and_name_isl_dims(
-        dep_transform_map, dt.out, [temp_stmt_var], 0)
-    # Add stmt = temp_stmt_var constraint
-    dep_transform_map = add_eq_isl_constraint_from_names(
-        dep_transform_map, STATEMENT_VAR_NAME, temp_stmt_var)
+    # (for aligning when applying map to dependee portion of deps)
+    isl_map_marked = append_mark_to_isl_map_var_names(
+        append_mark_to_isl_map_var_names(isl_map, dt.in_, BEFORE_MARK),
+        dt.out, BEFORE_MARK)
 
     def _apply_transform_map_to_depender(dep_map):
+        # (since 'out' dim of dep is unmarked, use unmarked transform map)
 
         # Check overlap condition
         overlap = _check_overlap_condition_for_domain(
-            dep_map.range(), set(dep_transform_map.get_var_names(dt.in_)))
+            dep_map.range(), set(isl_map.get_var_names(dt.in_)))
 
         if not overlap:
             # Inames in s are not present in depender, don't change dep_map
             return dep_map
         else:
+            # At this point, overlap condition check guarantees that the
+            # in-dims of the transform map are a subset of the dims we're
+            # about to change.
+
+            # If there are any out-dims (depender dims) in dep_map that are not
+            # mapped by the transform map, add them to the in/out space of the
+            # transform map so that they remain unchanged.
+            # (temporary proxy dim names are needed in out space of transform
+            # map because isl won't allow any dim names to match, i.e., instead
+            # of just mapping {[unused_name]->[unused_name]}, we have to map
+            # {[unused_name]->[unused_name__prox] : unused_name__prox = unused_name},
+            # and then rename unused_name__prox afterward.)
+            (
+                augmented_trans_map, proxy_name_pairs
+            ) = _apply_identity_for_missing_map_dims(
+                isl_map, dep_map.get_var_names(dt.out))
 
             # Align 'in_' dim of transform map with 'out' dim of dep
-            # (since 'out' dim of dep is unmarked, use unmarked dep_transform_map)
             from loopy.schedule.checker.utils import reorder_dims_by_name
-            dep_transform_map_aligned = reorder_dims_by_name(
-                dep_transform_map, dt.in_, dep_map.get_var_names(dt.out))
+            augmented_trans_map_aligned = reorder_dims_by_name(
+                augmented_trans_map, dt.in_, dep_map.get_var_names(dt.out))
 
             # Apply transform map to dep output dims
-            transformed_dep_map = dep_map.apply_range(dep_transform_map_aligned)
+            new_dep_map = dep_map.apply_range(augmented_trans_map_aligned)
+
+            # Now rename the proxy dims back to their original names
+            for real_iname, proxy_iname in proxy_name_pairs:
+                new_dep_map = find_and_rename_dim(
+                    new_dep_map, [dt.out], proxy_iname, real_iname)
 
-            # Now we've renamed statement var, so fix it (assume statement dim is 0)
-            return transformed_dep_map.set_dim_name(dt.out, 0, STATEMENT_VAR_NAME)
+            # Statement var may have moved, so put it back at the beginning
+            new_dep_map = move_dim_to_index(
+                new_dep_map, STATEMENT_VAR_NAME, dt.out, 0)
+
+            return new_dep_map
 
     def _apply_transform_map_to_dependee(dep_map):
+        # (since 'in_' dim of dep is marked, use isl_map_marked)
 
         # Check overlap condition
         overlap = _check_overlap_condition_for_domain(
-            dep_map.domain(), set(dep_transform_map_marked.get_var_names(dt.in_)))
+            dep_map.domain(), set(isl_map_marked.get_var_names(dt.in_)))
 
         if not overlap:
             # Inames in s are not present in dependee, don't change dep_map
             return dep_map
         else:
+            # At this point, overlap condition check guarantees that the
+            # in-dims of the transform map are a subset of the dims we're
+            # about to change.
+
+            # If there are any in-dims (dependee dims) in dep_map that are not
+            # mapped by the transform map, add them to the in/out space of the
+            # transform map so that they remain unchanged.
+            # (temporary proxy dim names are needed in out space of transform
+            # map because isl won't allow any dim names to match, i.e., instead
+            # of just mapping {[unused_name]->[unused_name]}, we have to map
+            # {[unused_name]->[unused_name__prox] : unused_name__prox = unused_name},
+            # and then rename unused_name__prox afterward.)
+            (
+                augmented_trans_map_marked, proxy_name_pairs
+            ) = _apply_identity_for_missing_map_dims(
+                isl_map_marked, dep_map.get_var_names(dt.in_))
 
             # Align 'in_' dim of transform map with 'in_' dim of dep
-            # (since 'in_' dim of dep is marked, use dep_transform_map_marked)
             from loopy.schedule.checker.utils import reorder_dims_by_name
-            dep_transform_map_aligned = reorder_dims_by_name(
-                dep_transform_map_marked, dt.in_, dep_map.get_var_names(dt.in_))
+            augmented_trans_map_aligned = reorder_dims_by_name(
+                augmented_trans_map_marked, dt.in_,
+                dep_map.get_var_names(dt.in_))
+
+            # Apply transform map to dep input dims
+            new_dep_map = dep_map.apply_domain(augmented_trans_map_aligned)
+
+            # Now rename the proxy dims back to their original names
+            for real_iname, proxy_iname in proxy_name_pairs:
+                new_dep_map = find_and_rename_dim(
+                    new_dep_map, [dt.in_], proxy_iname, real_iname)
 
-            # Apply transform map to dep input dims (and re-insert BEFORE_MARK)
-            transformed_dep_map = dep_map.apply_domain(dep_transform_map_aligned)
+            # Statement var may have moved, so put it back at the beginning
+            new_dep_map = move_dim_to_index(
+                new_dep_map, STATEMENT_VAR_NAME+BEFORE_MARK, dt.in_, 0)
 
-            # Now re-add the before marks
-            return append_mark_to_isl_map_var_names(
-                transformed_dep_map, dt.in_, BEFORE_MARK)
+            return new_dep_map
 
     # TODO figure out proper way to create false match condition
     false_id_match = "not id:*"

From 71881c8cdcdc9066f012d2207a149e43916e6ba8 Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Mon, 17 May 2021 02:28:10 -0500
Subject: [PATCH 386/460] test to make sure map_domain handles dependencies
 correctly when the transform map only covers a subset of the inames in the
 dependency (leave the other inames unchanged)

---
 test/test_linearization_checker.py | 83 ++++++++++++++++++++++++++++++
 1 file changed, 83 insertions(+)

diff --git a/test/test_linearization_checker.py b/test/test_linearization_checker.py
index 321f04da5..c9a26b614 100644
--- a/test/test_linearization_checker.py
+++ b/test/test_linearization_checker.py
@@ -2122,6 +2122,89 @@ def test_map_domain_with_only_partial_dep_pair_affected():
 # }}}
 
 
+# {{{ test_map_domain_with_inames_missing_in_transform_map
+
+def test_map_domain_with_inames_missing_in_transform_map():
+
+    # Make sure map_domain updates deps correctly when the mapping doesn't
+    # include all the dims in the domain.
+
+    # {{{ Make kernel
+
+    knl = lp.make_kernel(
+        "[nx,nt] -> {[x, y, z, t]: 0 <= x,y,z < nx and 0 <= t < nt}",
+        """
+        a[y,x,t,z] = b[y,x,t,z]  {id=stmta}
+        """,
+        lang_version=(2018, 2),
+        )
+    knl = lp.add_and_infer_dtypes(knl, {"b": np.float32})
+
+    # }}}
+
+    # {{{ Create dependency
+
+    dep = _isl_map_with_marked_dims(
+        "[nx, nt] -> {{"
+        "[{0}' = 0, x', y', z', t'] -> [{0} = 0, x, y, z, t] : "
+        "0 <= x,y,z,x',y',z' < nx and 0 <= t,t' < nt and "
+        "t' < t and x' < x and y' < y and z' < z"
+        "}}".format(STATEMENT_VAR_NAME))
+
+    knl = lp.add_dependency_v2(knl, "stmta", "stmta", dep)
+
+    # }}}
+
+    # {{{ Apply domain change mapping
+
+    # Create map_domain mapping that only includes t and y
+    # (x and z should be unaffected)
+    transform_map = isl.BasicMap(
+        "[nx,nt] -> {[t, y] -> [t_outer, t_inner, y_new]: "
+        "0 <= t_inner < 32 and "
+        "32*t_outer + t_inner = t and "
+        "0 <= 32*t_outer + t_inner < nt and "
+        "y = y_new"
+        "}")
+
+    # Call map_domain to transform kernel
+    knl = lp.map_domain(knl, transform_map)
+
+    # }}}
+
+    # {{{ Create expected dependency after transformation
+
+    dep_exp = _isl_map_with_marked_dims(
+        "[nx, nt] -> {{"
+        "[{0}' = 0, x', y_new', z', t_outer', t_inner'] -> "
+        "[{0} = 0, x, y_new, z, t_outer, t_inner] : "
+        "0 <= x,z,x',z' < nx "  # old bounds
+        "and 0 <= t_inner,t_inner' < 32 and 0 <= y_new,y_new' < nx "  # new bounds
+        "and 0 <= 32*t_outer + t_inner < nt "  # new bounds
+        "and 0 <= 32*t_outer' + t_inner' < nt "  # new bounds
+        "and x' < x and z' < z "  # old constraints
+        "and y_new' < y_new "  # new constraint
+        "and 32*t_outer' + t_inner' < 32*t_outer + t_inner"  # new constraint
+        "}}".format(STATEMENT_VAR_NAME))
+
+    # }}}
+
+    # {{{ Make sure deps are correct and satisfied
+
+    # Compare deps and make sure they are satisfied
+    unsatisfied_deps = _compare_dependencies(
+        knl,
+        {"stmta": {"stmta": [dep_exp, ]}},
+        return_unsatisfied=True)
+
+    assert not unsatisfied_deps
+
+    # }}}
+
+
+# }}}
+
+
 # {{{ test_map_domain_with_stencil_dependencies
 
 def test_map_domain_with_stencil_dependencies():

From fbc8952e970aca6822c0b06935d95acc1544d494 Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Mon, 17 May 2021 02:43:16 -0500
Subject: [PATCH 387/460] be consistent with dt vs dim_type naming: set dt =
 isl.dim_type, and use 'dim_type' to refer to a specific dim type

---
 loopy/transform/iname.py | 117 ++++++++++++++++++---------------------
 1 file changed, 53 insertions(+), 64 deletions(-)

diff --git a/loopy/transform/iname.py b/loopy/transform/iname.py
index 1c426fea1..c2bf6f287 100644
--- a/loopy/transform/iname.py
+++ b/loopy/transform/iname.py
@@ -22,7 +22,7 @@
 
 
 import islpy as isl
-from islpy import dim_type
+from islpy import dim_type as dt
 
 from loopy.symbolic import (
         RuleAwareIdentityMapper, RuleAwareSubstitutionMapper,
@@ -265,7 +265,6 @@ def _split_iname_backend(kernel, iname_to_split,
         convert_map_to_set,
         remove_dim_by_name,
     )
-    dt = isl.dim_type
 
     def _split_iname_in_depender(dep):
 
@@ -513,15 +512,15 @@ def make_new_loop_index(inner, outer):
         if split_iname not in var_dict:
             continue
 
-        dt, idx = var_dict[split_iname]
-        assert dt == dim_type.set
+        dim_type, idx = var_dict[split_iname]
+        assert dim_type == dt.set
 
         aff_zero = isl.Aff.zero_on_domain(dom.space)
-        aff_split_iname = aff_zero.set_coefficient_val(dim_type.in_, idx, 1)
+        aff_split_iname = aff_zero.set_coefficient_val(dt.in_, idx, 1)
         aligned_size = isl.align_spaces(size, aff_zero)
         box_dom = (
                 dom
-                .eliminate(dt, idx, 1)
+                .eliminate(dim_type, idx, 1)
                 & aff_zero.le_set(aff_split_iname)
                 & aff_split_iname.lt_set(aligned_size)
                 )
@@ -624,9 +623,9 @@ def join_inames(kernel, inames, new_iname=None, tag=None, within=None):
                     "join's leaf domain" % iname)
 
     new_domain = domch.domain
-    new_dim_idx = new_domain.dim(dim_type.set)
-    new_domain = new_domain.add_dims(dim_type.set, 1)
-    new_domain = new_domain.set_dim_name(dim_type.set, new_dim_idx, new_iname)
+    new_dim_idx = new_domain.dim(dt.set)
+    new_domain = new_domain.add_dims(dt.set, 1)
+    new_domain = new_domain.set_dim_name(dt.set, new_dim_idx, new_iname)
 
     joint_aff = zero = isl.Aff.zero_on_domain(new_domain.space)
     subst_dict = {}
@@ -978,7 +977,6 @@ def duplicate_inames(kernel, inames, within, new_inames=None, suffix=None,
 
         from loopy.transform.instruction import map_dependency_maps
         from loopy.schedule.checker.schedule import BEFORE_MARK
-        dt = isl.dim_type
         old_iname_p = old_iname+BEFORE_MARK
         new_iname_p = new_iname+BEFORE_MARK
 
@@ -1221,21 +1219,21 @@ def rename_iname(kernel, old_iname, new_iname, existing_ok=False, within=None):
         _, old_idx = var_dict[old_iname]
         _, new_idx = var_dict[new_iname]
 
-        par_idx = dom.dim(dim_type.param)
+        par_idx = dom.dim(dt.param)
         dom_old = dom.move_dims(
-                dim_type.param, par_idx, dim_type.set, old_idx, 1)
+                dt.param, par_idx, dt.set, old_idx, 1)
         dom_old = dom_old.move_dims(
-                dim_type.set, dom_old.dim(dim_type.set), dim_type.param, par_idx, 1)
+                dt.set, dom_old.dim(dt.set), dt.param, par_idx, 1)
         dom_old = dom_old.project_out(
-                dim_type.set, new_idx if new_idx < old_idx else new_idx - 1, 1)
+                dt.set, new_idx if new_idx < old_idx else new_idx - 1, 1)
 
-        par_idx = dom.dim(dim_type.param)
+        par_idx = dom.dim(dt.param)
         dom_new = dom.move_dims(
-                dim_type.param, par_idx, dim_type.set, new_idx, 1)
+                dt.param, par_idx, dt.set, new_idx, 1)
         dom_new = dom_new.move_dims(
-                dim_type.set, dom_new.dim(dim_type.set), dim_type.param, par_idx, 1)
+                dt.set, dom_new.dim(dt.set), dt.param, par_idx, 1)
         dom_new = dom_new.project_out(
-                dim_type.set, old_idx if old_idx < new_idx else old_idx - 1, 1)
+                dt.set, old_idx if old_idx < new_idx else old_idx - 1, 1)
 
         if not (dom_old <= dom_new and dom_new <= dom_old):
             raise LoopyError(
@@ -1303,11 +1301,11 @@ def remove_vars_from_set(s, remove_vars):
     new_s = deepcopy(s)
     for var in remove_vars:
         try:
-            dt, idx = s.get_var_dict()[var]
+            dim_type, idx = s.get_var_dict()[var]
         except KeyError:
             continue
         else:
-            new_s = new_s.project_out(dt, idx, 1)
+            new_s = new_s.project_out(dim_type, idx, 1)
     return new_s
 
 
@@ -1663,10 +1661,10 @@ def parse_equation(eqn):
         # add inames to domain with correct dim_types
         dom_new_inames = list(dom_new_inames)
         for iname in dom_new_inames:
-            dt = new_iname_dim_types[iname]
-            iname_idx = dom.dim(dt)
-            dom = dom.add_dims(dt, 1)
-            dom = dom.set_dim_name(dt, iname_idx, iname)
+            dim_type = new_iname_dim_types[iname]
+            iname_idx = dom.dim(dim_type)
+            dom = dom.add_dims(dim_type, 1)
+            dom = dom.set_dim_name(dim_type, iname_idx, iname)
 
         # add equations
         from loopy.symbolic import aff_from_expr
@@ -1677,8 +1675,8 @@ def parse_equation(eqn):
 
         # project out old inames
         for iname in dom_old_inames:
-            dt, idx = dom.get_var_dict()[iname]
-            dom = dom.project_out(dt, idx, 1)
+            dim_type, idx = dom.get_var_dict()[iname]
+            dom = dom.project_out(dim_type, idx, 1)
 
         new_domains.append(dom)
 
@@ -1981,32 +1979,32 @@ def _find_aff_subst_from_map(iname, isl_map):
     if not isinstance(isl_map, isl.BasicMap):
         raise RuntimeError("isl_map must be a BasicMap")
 
-    dt, dim_idx = isl_map.get_var_dict()[iname]
+    dim_type, dim_idx = isl_map.get_var_dict()[iname]
 
-    assert dt == dim_type.in_
+    assert dim_type == dt.in_
 
     # Force isl to solve for only this iname on its side of the map, by
     # projecting out all other "in" variables.
-    isl_map = isl_map.project_out(dt, dim_idx+1, isl_map.dim(dt)-(dim_idx+1))
-    isl_map = isl_map.project_out(dt, 0, dim_idx)
+    isl_map = isl_map.project_out(dim_type, dim_idx+1, isl_map.dim(dim_type)-(dim_idx+1))
+    isl_map = isl_map.project_out(dim_type, 0, dim_idx)
     dim_idx = 0
 
     # Convert map to set to avoid "domain of affine expression should be a set".
     # The old "in" variable will be the last of the out_dims.
-    new_dim_idx = isl_map.dim(dim_type.out)
+    new_dim_idx = isl_map.dim(dt.out)
     isl_map = isl_map.move_dims(
-            dim_type.out, isl_map.dim(dim_type.out),
-            dt, dim_idx, 1)
+            dt.out, isl_map.dim(dt.out),
+            dim_type, dim_idx, 1)
     isl_map = isl_map.range()  # now a set
-    dt = dim_type.set
+    dim_type = dt.set
     dim_idx = new_dim_idx
     del new_dim_idx
 
     for cns in isl_map.get_constraints():
-        if cns.is_equality() and cns.involves_dims(dt, dim_idx, 1):
-            coeff = cns.get_coefficient_val(dt, dim_idx)
-            cns_zeroed = cns.set_coefficient_val(dt, dim_idx, 0)
-            if cns_zeroed.involves_dims(dt, dim_idx, 1):
+        if cns.is_equality() and cns.involves_dims(dim_type, dim_idx, 1):
+            coeff = cns.get_coefficient_val(dim_type, dim_idx)
+            cns_zeroed = cns.set_coefficient_val(dim_type, dim_idx, 0)
+            if cns_zeroed.involves_dims(dim_type, dim_idx, 1):
                 # not suitable, constraint still involves dim, perhaps in a div
                 continue
 
@@ -2031,11 +2029,10 @@ def _apply_identity_for_missing_map_dims(mapping, desired_dims):
     # so that, e.g, intersect_domain doesn't remove them.
     # (assume ordering will be handled afterward)
 
-    # TODO remove sorted
-    missing_dims = sorted(list(
-        set(desired_dims) - set(mapping.get_var_names(dim_type.in_))))
+    missing_dims = list(
+        set(desired_dims) - set(mapping.get_var_names(dt.in_)))
     augmented_mapping = add_and_name_isl_dims(
-        mapping, dim_type.in_, missing_dims)
+        mapping, dt.in_, missing_dims)
 
     # We want these missing inames to map to themselves so that the map
     # has no effect on them. Unfortunatley isl will break if the
@@ -2048,7 +2045,7 @@ def _apply_identity_for_missing_map_dims(mapping, desired_dims):
         augmented_mapping.get_var_dict().keys())
 
     augmented_mapping = add_and_name_isl_dims(
-        augmented_mapping, dim_type.out, missing_dims_proxies)
+        augmented_mapping, dt.out, missing_dims_proxies)
 
     proxy_name_pairs = list(zip(missing_dims, missing_dims_proxies))
 
@@ -2060,8 +2057,6 @@ def _apply_identity_for_missing_map_dims(mapping, desired_dims):
     return augmented_mapping, proxy_name_pairs
 
 
-# TODO swap dt and dim_type
-
 def map_domain(kernel, isl_map, within=None, rename_after={}):
     # FIXME: Express _split_iname_backend in terms of this
     #   Missing/deleted for now:
@@ -2094,8 +2089,8 @@ def map_domain(kernel, isl_map, within=None, rename_after={}):
     if not isl_map.is_bijective():
         raise LoopyError("isl_map must be bijective")
 
-    new_inames = frozenset(isl_map.get_var_dict(dim_type.out))
-    old_inames = frozenset(isl_map.get_var_dict(dim_type.in_))
+    new_inames = frozenset(isl_map.get_var_dict(dt.out))
+    old_inames = frozenset(isl_map.get_var_dict(dt.in_))
 
     # {{{ solve for representation of old inames in terms of new
 
@@ -2143,7 +2138,6 @@ def _check_overlap_condition_for_domain(s, transform_map_in_names):
         return overlap
 
     from loopy.schedule.checker.utils import (
-        add_and_name_isl_dims,
         find_and_rename_dim,
     )
 
@@ -2158,10 +2152,6 @@ def process_set(s):
         # in-dims of the transform map are a subset of the dims we're
         # about to change.
 
-        from loopy.schedule.checker.utils import (
-            add_eq_isl_constraint_from_names,
-        )
-
         # {{{ align dims of isl_map and s
 
         from islpy import _align_dim_type
@@ -2176,20 +2166,20 @@ def process_set(s):
         # {[unused_name]->[unused_name__prox] : unused_name__prox = unused_name},
         # and then rename unused_name__prox afterward.)
         augmented_isl_map, proxy_name_pairs = _apply_identity_for_missing_map_dims(
-            isl_map, s.get_var_names(dim_type.set))
+            isl_map, s.get_var_names(dt.set))
 
         # FIXME: Make this less gross
         # FIXME: Make an exported/documented interface of this in islpy
-        dim_types = [dim_type.param, dim_type.in_, dim_type.out]
+        dim_types = [dt.param, dt.in_, dt.out]
         s_names = [
-                map_with_s_domain.get_dim_name(dt, i)
-                for dt in dim_types
-                for i in range(map_with_s_domain.dim(dt))
+                map_with_s_domain.get_dim_name(dim_type, i)
+                for dim_type in dim_types
+                for i in range(map_with_s_domain.dim(dim_type))
                 ]
         map_names = [
-                augmented_isl_map.get_dim_name(dt, i)
-                for dt in dim_types
-                for i in range(augmented_isl_map.dim(dt))
+                augmented_isl_map.get_dim_name(dim_type, i)
+                for dim_type in dim_types
+                for i in range(augmented_isl_map.dim(dim_type))
                 ]
 
         # (order doesn't matter in s_names/map_names,
@@ -2197,11 +2187,11 @@ def process_set(s):
         # to determine which names are in both the obj and template,
         # not sure why this isn't just handled inside _align_dim_type)
         aligned_map = _align_dim_type(
-                dim_type.param,
+                dt.param,
                 augmented_isl_map, map_with_s_domain, False,
                 map_names, s_names)
         aligned_map = _align_dim_type(
-                dim_type.in_,
+                dt.in_,
                 aligned_map, map_with_s_domain, False,
                 map_names, s_names)
 
@@ -2212,7 +2202,7 @@ def process_set(s):
         # Now rename the proxy dims back to their original names
         for real_iname, proxy_iname in proxy_name_pairs:
             new_s = find_and_rename_dim(
-                new_s, [dim_type.set], proxy_iname, real_iname)
+                new_s, [dt.set], proxy_iname, real_iname)
 
         return new_s
 
@@ -2228,7 +2218,6 @@ def process_set(s):
         append_mark_to_isl_map_var_names,
         move_dim_to_index,
     )
-    dt = isl.dim_type
 
     # Create version of transform map with before marks
     # (for aligning when applying map to dependee portion of deps)

From d83a87d7a0d204dad7a86f179f9ffc34ff030a98 Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Mon, 17 May 2021 02:46:51 -0500
Subject: [PATCH 388/460] fix flake8 issue

---
 loopy/transform/iname.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/loopy/transform/iname.py b/loopy/transform/iname.py
index c2bf6f287..c13b38e41 100644
--- a/loopy/transform/iname.py
+++ b/loopy/transform/iname.py
@@ -1985,7 +1985,8 @@ def _find_aff_subst_from_map(iname, isl_map):
 
     # Force isl to solve for only this iname on its side of the map, by
     # projecting out all other "in" variables.
-    isl_map = isl_map.project_out(dim_type, dim_idx+1, isl_map.dim(dim_type)-(dim_idx+1))
+    isl_map = isl_map.project_out(
+        dim_type, dim_idx+1, isl_map.dim(dim_type)-(dim_idx+1))
     isl_map = isl_map.project_out(dim_type, 0, dim_idx)
     dim_idx = 0
 

From 48b4dfbf26c14a06ae0ea328e1afff274e57cc03 Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Sun, 23 May 2021 17:04:24 -0500
Subject: [PATCH 389/460] make helper function for dep creation that adds marks
 and inserts statement var dims (make_dep_map)

---
 loopy/schedule/checker/utils.py | 45 +++++++++++++++++++++++++++++++++
 1 file changed, 45 insertions(+)

diff --git a/loopy/schedule/checker/utils.py b/loopy/schedule/checker/utils.py
index 401fd477a..86bf935a4 100644
--- a/loopy/schedule/checker/utils.py
+++ b/loopy/schedule/checker/utils.py
@@ -136,6 +136,51 @@ def append_mark_to_strings(strings, mark):
     return [s+mark for s in strings]
 
 
+# {{{ make_dep_map
+
+def make_dep_map(s, self_dep=False):
+
+    # TODO put this function in the right place
+
+    from loopy.schedule.checker.schedule import (
+        BEFORE_MARK,
+        STATEMENT_VAR_NAME,
+    )
+
+    map_init = isl.Map(s)
+
+    # TODO something smarter than this assert
+    for dim_name in map_init.get_var_names(dt.in_):
+        assert BEFORE_MARK not in dim_name
+
+    # append BEFORE_MARK to in-vars
+    map_marked = append_mark_to_isl_map_var_names(
+        map_init, dt.in_, BEFORE_MARK)
+
+    # insert statement dims:
+    map_with_stmts = insert_and_name_isl_dims(
+        map_marked, dt.in_, [STATEMENT_VAR_NAME+BEFORE_MARK], 0)
+    map_with_stmts = insert_and_name_isl_dims(
+        map_with_stmts, dt.out, [STATEMENT_VAR_NAME], 0)
+
+    # assign values 0 or 1 to statement dims
+    sid_after = 0 if self_dep else 1
+
+    map_with_stmts = map_with_stmts.add_constraint(
+        isl.Constraint.eq_from_names(
+            map_with_stmts.space,
+            {1: 0, STATEMENT_VAR_NAME+BEFORE_MARK: -1}))
+
+    map_with_stmts = map_with_stmts.add_constraint(
+        isl.Constraint.eq_from_names(
+            map_with_stmts.space,
+            {1: sid_after, STATEMENT_VAR_NAME: -1}))
+
+    return map_with_stmts
+
+# }}}
+
+
 def sorted_union_of_names_in_isl_sets(
         isl_sets,
         set_dim=dt.set):

From 36793bd86ae5e89e9f7c751e0745987bd076dec9 Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Sun, 23 May 2021 17:05:05 -0500
Subject: [PATCH 390/460] use make_dep_map() instead of
 _isl_map_with_marked_dims (still need to replace more)

---
 test/test_linearization_checker.py | 33 ++++++++++++++++--------------
 1 file changed, 18 insertions(+), 15 deletions(-)

diff --git a/test/test_linearization_checker.py b/test/test_linearization_checker.py
index c5f3692ec..fab930675 100644
--- a/test/test_linearization_checker.py
+++ b/test/test_linearization_checker.py
@@ -45,6 +45,7 @@
 )
 from loopy.schedule.checker.utils import (
     ensure_dim_names_match_and_align,
+    make_dep_map,
 )
 
 logger = logging.getLogger(__name__)
@@ -1360,14 +1361,14 @@ def test_add_dependency_v2():
         assert not stmt.dependencies
 
     # Add a dependency to stmt_b
-    dep_b_on_a = _isl_map_with_marked_dims(
-        "[pi] -> {{ [{0}'=0, i'] -> [{0}=1, i] : i > i' "
-        "and {1} and {2} and {3} }}".format(
-            STATEMENT_VAR_NAME,
+    dep_b_on_a = make_dep_map(
+        "[pi] -> {{ [i'] -> [i] : i > i' "
+        "and {0} and {1} and {2} }}".format(
             i_range_str,
             i_range_str_p,
             assumptions_str,
-            ))
+            ),
+        self_dep=False)
 
     knl = lp.add_dependency_v2(knl, "stmt_b", "stmt_a", dep_b_on_a)
 
@@ -1380,14 +1381,14 @@ def test_add_dependency_v2():
             assert not stmt.dependencies
 
     # Add a second dependency to stmt_b
-    dep_b_on_a_2 = _isl_map_with_marked_dims(
-        "[pi] -> {{ [{0}'=0, i'] -> [{0}=1, i] : i = i' "
-        "and {1} and {2} and {3} }}".format(
-            STATEMENT_VAR_NAME,
+    dep_b_on_a_2 = make_dep_map(
+        "[pi] -> {{ [i'] -> [i] : i = i' "
+        "and {0} and {1} and {2} }}".format(
             i_range_str,
             i_range_str_p,
             assumptions_str,
-            ))
+            ),
+        self_dep=False)
 
     knl = lp.add_dependency_v2(knl, "stmt_b", "stmt_a", dep_b_on_a_2)
 
@@ -1400,6 +1401,7 @@ def test_add_dependency_v2():
             assert not stmt.dependencies
 
     # Add dependencies to stmt_c
+    # TODO use make_dep_map instead of _isl_map_with_marked_dims where possible
 
     dep_c_on_a = _isl_map_with_marked_dims(
         "[pi] -> {{ [{0}'=0, i'] -> [{0}=1, i] : i >= i' "
@@ -1460,15 +1462,16 @@ def test_new_dependencies_finite_diff():
     # Define dependency
     xt_range_str = "0 <= x < nx and 0 <= t < nt"
     xt_range_str_p = "0 <= x' < nx and 0 <= t' < nt"
-    dep = _isl_map_with_marked_dims(
-        "[nx,nt] -> {{ [{0}'=0, x', t'] -> [{0}=0, x, t] : "
+    dep = make_dep_map(
+        "[nx,nt] -> {{ [x', t'] -> [x, t] : "
         "((x = x' and t = t'+2) or "
         " (x'-1 <= x <= x'+1 and t = t' + 1)) and "
-        "{1} and {2} }}".format(
-            STATEMENT_VAR_NAME,
+        "{0} and {1} }}".format(
             xt_range_str,
             xt_range_str_p,
-            ))
+            ),
+        self_dep=True)
+
     knl = lp.add_dependency_v2(knl, "stmt", "stmt", dep)
 
     ref_knl = knl

From ec3185d3edd08d7cac12463a0479bd092ba80da7 Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Wed, 26 May 2021 20:32:27 -0500
Subject: [PATCH 391/460] allow dep checking to work if lin_items is not
 provided

---
 loopy/schedule/checker/__init__.py | 19 ++++++++++++-------
 1 file changed, 12 insertions(+), 7 deletions(-)

diff --git a/loopy/schedule/checker/__init__.py b/loopy/schedule/checker/__init__.py
index a343a70c9..99e3ffb90 100644
--- a/loopy/schedule/checker/__init__.py
+++ b/loopy/schedule/checker/__init__.py
@@ -142,7 +142,7 @@ def get_pairwise_statement_orderings(
 
 def find_unsatisfied_dependencies(
         knl,
-        lin_items,
+        lin_items=None,
         ):
     """For each statement (:class:`loopy.InstructionBase`) found in a
     preprocessed kernel, determine which dependencies, if any, have been
@@ -157,7 +157,8 @@ def find_unsatisfied_dependencies(
         (to be renamed to `loopy.schedule.LinearizationItem`) containing all
         linearization items in `knl.linearization`. To allow usage of
         this routine during linearization, a truncated (i.e. partial)
-        linearization may be passed through this argument.
+        linearization may be passed through this argument. If not provided,
+        `knl.linearization` will be used.
 
     :returns: A list of unsatisfied dependencies, each described using a
         :class:`collections.namedtuple` containing the following:
@@ -185,13 +186,17 @@ def find_unsatisfied_dependencies(
 
     """
 
-    # {{{ make sure kernel has been preprocessed
+    # {{{ Handle lin_items=None and make sure kernel has been preprocessed
 
-    # Note: kernels must always be preprocessed before scheduling
     from loopy.kernel import KernelState
-    assert knl.state in [
-            KernelState.PREPROCESSED,
-            KernelState.LINEARIZED]
+    if lin_items is None:
+        assert knl.state == KernelState.LINEARIZED
+        lin_items = knl.linearization
+    else:
+        # Note: kernels must always be preprocessed before scheduling
+        assert knl.state in [
+                KernelState.PREPROCESSED,
+                KernelState.LINEARIZED]
 
     # }}}
 

From f17f2825064e06f01944fce4f55a66ab94cc1bb9 Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Wed, 26 May 2021 20:33:03 -0500
Subject: [PATCH 392/460] make sure dep checking works even when linearization
 items are not provided separately from kernel

---
 test/test_linearization_checker.py | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/test/test_linearization_checker.py b/test/test_linearization_checker.py
index fab930675..0bddbc973 100644
--- a/test/test_linearization_checker.py
+++ b/test/test_linearization_checker.py
@@ -1444,6 +1444,11 @@ def test_add_dependency_v2():
 
     assert not unsatisfied_deps
 
+    # Make sure dep checking also works with just linearized kernel
+    unsatisfied_deps = lp.find_unsatisfied_dependencies(lin_knl)
+
+    assert not unsatisfied_deps
+
 # }}}
 
 
@@ -1490,6 +1495,11 @@ def test_new_dependencies_finite_diff():
     print(lp.generate_code_v2(lin_knl).device_code())
     assert not unsatisfied_deps
 
+    # Make sure dep checking also works with just linearized kernel
+    unsatisfied_deps = lp.find_unsatisfied_dependencies(lin_knl)
+
+    assert not unsatisfied_deps
+
     # }}}
     # {{{ Check with incorrect loop nest order
 

From 22173351d54b48060164967fd90c078364d377bd Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Wed, 26 May 2021 20:50:31 -0500
Subject: [PATCH 393/460] make sure dep checking works even when
 find_unsatisfied_deps doesn't receive linearization items separately from
 kernel

---
 test/test_linearization_checker.py | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/test/test_linearization_checker.py b/test/test_linearization_checker.py
index bad5230d0..d4c3dd0b4 100644
--- a/test/test_linearization_checker.py
+++ b/test/test_linearization_checker.py
@@ -1366,8 +1366,13 @@ def _compare_dependencies(knl, deps_expected, return_unsatisfied=False):
 
     # Get unsatisfied deps
     lin_items, proc_knl, lin_knl = _process_and_linearize(knl)
-    return lp.find_unsatisfied_dependencies(proc_knl, lin_items)
+    unsatisfied_deps = lp.find_unsatisfied_dependencies(proc_knl, lin_items)
 
+    # Make sure dep checking also works with just linearized kernel
+    unsatisfied_deps_2 = lp.find_unsatisfied_dependencies(lin_knl)
+    assert len(unsatisfied_deps) == len(unsatisfied_deps_2)
+
+    return unsatisfied_deps
 
 # }}}
 

From 1b526408e02363d93f9cf822f19de29b27330053 Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Tue, 1 Jun 2021 02:50:18 -0500
Subject: [PATCH 394/460] remove debugging assertion

---
 loopy/transform/subst.py | 16 ++++------------
 1 file changed, 4 insertions(+), 12 deletions(-)

diff --git a/loopy/transform/subst.py b/loopy/transform/subst.py
index e6e146567..12049d776 100644
--- a/loopy/transform/subst.py
+++ b/loopy/transform/subst.py
@@ -362,18 +362,10 @@ def get_relevant_definition_insn_id(usage_insn_id):
             definition_insn_ids.add(insn.id)
             definition_id_to_deps[insn.id] = deepcopy(insn.dependencies)
 
-    # TODO refactor after answering question:
-    # what's the difference between definition_insn_ids and
-    # set(usage_to_definition.values())?
-    if definition_insn_ids != set(usage_to_definition.values()):
-        print("="*80)
-        print("Apparently these are not equivalent after all. James was wrong.")
-        print("definition_insn_ids:")
-        print(definition_insn_ids)
-        print("set(usage_to_definition.values()):")
-        print(set(usage_to_definition.values()))
-        print("="*80)
-        assert False
+    # usage_to_definition maps each usage to the most recent assignment to the var,
+    # (most recent "definition"),
+    # so set(usage_to_definition.values()) is a subset of definition_insn_ids,
+    # which contains ALL the insns where the var is assigned
 
     # }}}
 

From dbc88587ccfb735278f3088267605ec3422826bf Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Wed, 2 Jun 2021 10:20:33 -0500
Subject: [PATCH 395/460] handle case where subst definition is removed, was
 both a depender and a dependee, and the inames of the stmt that depended on
 it were different

---
 loopy/transform/instruction.py |  5 ++-
 loopy/transform/subst.py       | 79 ++++++++++++++++++++++++++++++----
 2 files changed, 74 insertions(+), 10 deletions(-)

diff --git a/loopy/transform/instruction.py b/loopy/transform/instruction.py
index 7673cf685..50ddf8cc4 100644
--- a/loopy/transform/instruction.py
+++ b/loopy/transform/instruction.py
@@ -133,7 +133,8 @@ def map_stmt_dependencies(kernel, stmt_match, f):
     # Does not search for matching dependees of non-matching depender statements!
 
     def _update_deps(stmt):
-        new_deps = f(stmt.dependencies)
+        # pass stmt to f because might need info
+        new_deps = f(stmt.dependencies, stmt)
         return stmt.copy(dependencies=new_deps)
 
     return map_instructions(kernel, stmt_match, _update_deps)
@@ -226,7 +227,7 @@ def add_dependency_v2(
                 "cannot add dependency %s->%s"
                 % (depends_on_id, depends_on_id, stmt_id))
 
-    def _add_dep(stmt_deps):
+    def _add_dep(stmt_deps, stmt):
         # stmt_deps: dict mapping depends-on ids to dep maps
         stmt_deps.setdefault(depends_on_id, []).append(new_dependency)
         return stmt_deps
diff --git a/loopy/transform/subst.py b/loopy/transform/subst.py
index 12049d776..59424c04f 100644
--- a/loopy/transform/subst.py
+++ b/loopy/transform/subst.py
@@ -353,6 +353,11 @@ def get_relevant_definition_insn_id(usage_insn_id):
         usage_to_definition[insn.id] = def_id
         definition_to_usage_ids.setdefault(def_id, set()).add(insn.id)
 
+    # these insns may be removed so can't get within_inames later
+    definition_to_within_inames = {}
+    for def_id in definition_to_usage_ids.keys():
+        definition_to_within_inames[def_id] = kernel.id_to_insn[def_id].within_inames
+
     # Get deps for subst_def statements before any of them get removed
     definition_id_to_deps = {}
     from copy import deepcopy
@@ -470,19 +475,77 @@ def get_relevant_definition_insn_id(usage_insn_id):
         unmatched_usage_ids = tts.unmatched_usage_sites_found[subst_def_id]
         matched_usage_ids = subst_usage_ids - unmatched_usage_ids
         if matched_usage_ids:
+            import islpy as isl
+            dt = isl.dim_type
             # Create match condition string:
             match_any_matched_usage_id = " or ".join(
                 ["id:%s" % (usage_id) for usage_id in matched_usage_ids])
 
             subst_def_deps_dict = definition_id_to_deps[subst_def_id]
-
-            def _add_deps_to_stmt(dep_dict):
-                # dep_dict: prev dep dict for this stmt
-                # add the deps
-                for depends_on_id, dep_list in subst_def_deps_dict.items():
-                    dep_list_copy = deepcopy(dep_list)
-                    dep_dict.setdefault(depends_on_id, []).extend(dep_list_copy)
-                return dep_dict
+            old_dep_out_inames = definition_to_within_inames[subst_def_id]
+
+            def _add_deps_to_stmt(old_dep_dict, stmt):
+                # old_dep_dict: prev dep dict for this stmt
+
+                # want to add old dep from def stmt to usage stmt,
+                # but if inames of def stmt don't match inames of usage stmt,
+                # need to get rid of unwanted inames in old dep out dims and add
+                # any missing inames (inames from usage stmt not present in def stmt)
+                new_dep_out_inames = stmt.within_inames
+                out_inames_to_project_out = old_dep_out_inames - new_dep_out_inames
+                out_inames_to_add = new_dep_out_inames - old_dep_out_inames
+                # inames_domain for new inames to add
+                dom_for_new_inames = kernel.get_inames_domain(
+                    out_inames_to_add
+                    ).project_out_except(out_inames_to_add, [dt.set])
+
+                # process and add the old deps
+                for depends_on_id, old_dep_list in subst_def_deps_dict.items():
+                    # pu.db
+
+                    new_dep_list = []
+                    for old_dep in old_dep_list:
+                        # TODO figure out when copies are necessary
+                        new_dep = deepcopy(old_dep)
+
+                        # project out inames from old dep (out dim) that don't apply
+                        # to this statement
+                        for old_iname in out_inames_to_project_out:
+                            idx_of_old_iname = old_dep.find_dim_by_name(
+                                dt.out, old_iname)
+                            assert idx_of_old_iname != -1
+                            new_dep = new_dep.project_out(
+                                dt.out, idx_of_old_iname, 1)
+
+                        # add inames from this stmt that were not present in old dep
+                        from loopy.schedule.checker.utils import (
+                            add_and_name_isl_dims,
+                        )
+                        new_dep = add_and_name_isl_dims(
+                            new_dep, dt.out, out_inames_to_add)
+
+                        # add inames domain for new inames
+                        """
+                        # insert stmt dim
+                        from loopy.schedule.checker.utils import (
+                            reorder_dims_by_name,
+                            insert_and_name_isl_dims,
+                        )
+                        from loopy.schedule.checker.schedule import (
+                            STATEMENT_VAR_NAME,
+                        )
+                        dom_for_new_iname = insert_and_name_isl_dims(
+                            dom_for_new_iname, dt.set, [STATEMENT_VAR_NAME], 0)
+                        """
+                        dom_aligned = isl.align_spaces(
+                            dom_for_new_inames, new_dep.range())
+
+                        # Intersect domain with dep
+                        new_dep = new_dep.intersect_range(dom_aligned)
+                        new_dep_list.append(new_dep)
+
+                    old_dep_dict.setdefault(depends_on_id, []).extend(new_dep_list)
+                return old_dep_dict
 
             kernel = map_stmt_dependencies(
                 kernel, match_any_matched_usage_id, _add_deps_to_stmt)

From 7075f7e9c264aa5713acb11437bdccb27ec80a3c Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Wed, 2 Jun 2021 10:21:23 -0500
Subject: [PATCH 396/460] test dep handling during assignment_to_subst in case
 where subst definition is removed, was both a depender and a dependee, and
 the inames of the stmt that depended on it were different

---
 test/test_linearization_checker.py | 130 +++++++++++++++++++----------
 1 file changed, 85 insertions(+), 45 deletions(-)

diff --git a/test/test_linearization_checker.py b/test/test_linearization_checker.py
index d4c3dd0b4..2fb264824 100644
--- a/test/test_linearization_checker.py
+++ b/test/test_linearization_checker.py
@@ -150,6 +150,49 @@ def _process_and_linearize(knl):
 # }}}
 
 
+# {{{ Helper functions for dependency tests
+
+
+def _compare_dependencies(knl, deps_expected, return_unsatisfied=False):
+
+    deps_found = {}
+    for stmt in knl.instructions:
+        if hasattr(stmt, "dependencies") and stmt.dependencies:
+            deps_found[stmt.id] = stmt.dependencies
+
+    assert deps_found.keys() == deps_expected.keys()
+
+    for stmt_id_after, dep_dict_found in deps_found.items():
+
+        dep_dict_expected = deps_expected[stmt_id_after]
+
+        # Ensure deps for stmt_id_after match
+        assert dep_dict_found.keys() == dep_dict_expected.keys()
+
+        for stmt_id_before, dep_list_found in dep_dict_found.items():
+
+            # Ensure deps from (stmt_id_before -> stmt_id_after) match
+            dep_list_expected = dep_dict_expected[stmt_id_before]
+            print("comparing deps %s->%s" % (stmt_id_before, stmt_id_after))
+            assert len(dep_list_found) == len(dep_list_expected)
+            _align_and_compare_maps(zip(dep_list_found, dep_list_expected))
+
+    if not return_unsatisfied:
+        return
+
+    # Get unsatisfied deps
+    lin_items, proc_knl, lin_knl = _process_and_linearize(knl)
+    unsatisfied_deps = lp.find_unsatisfied_dependencies(proc_knl, lin_items)
+
+    # Make sure dep checking also works with just linearized kernel
+    unsatisfied_deps_2 = lp.find_unsatisfied_dependencies(lin_knl)
+    assert len(unsatisfied_deps) == len(unsatisfied_deps_2)
+
+    return unsatisfied_deps
+
+# }}}
+
+
 # {{{ test_intra_thread_pairwise_schedule_creation()
 
 def test_intra_thread_pairwise_schedule_creation():
@@ -1335,48 +1378,6 @@ def test_sios_with_matmul():
 
 # {{{ Dependency tests
 
-# {{{ Helper functions
-
-
-def _compare_dependencies(knl, deps_expected, return_unsatisfied=False):
-
-    deps_found = {}
-    for stmt in knl.instructions:
-        if hasattr(stmt, "dependencies") and stmt.dependencies:
-            deps_found[stmt.id] = stmt.dependencies
-
-    assert deps_found.keys() == deps_expected.keys()
-
-    for stmt_id_after, dep_dict_found in deps_found.items():
-
-        dep_dict_expected = deps_expected[stmt_id_after]
-
-        # Ensure deps for stmt_id_after match
-        assert dep_dict_found.keys() == dep_dict_expected.keys()
-
-        for stmt_id_before, dep_list_found in dep_dict_found.items():
-
-            # Ensure deps from (stmt_id_before -> stmt_id_after) match
-            dep_list_expected = dep_dict_expected[stmt_id_before]
-            assert len(dep_list_found) == len(dep_list_expected)
-            _align_and_compare_maps(zip(dep_list_found, dep_list_expected))
-
-    if not return_unsatisfied:
-        return
-
-    # Get unsatisfied deps
-    lin_items, proc_knl, lin_knl = _process_and_linearize(knl)
-    unsatisfied_deps = lp.find_unsatisfied_dependencies(proc_knl, lin_items)
-
-    # Make sure dep checking also works with just linearized kernel
-    unsatisfied_deps_2 = lp.find_unsatisfied_dependencies(lin_knl)
-    assert len(unsatisfied_deps) == len(unsatisfied_deps_2)
-
-    return unsatisfied_deps
-
-# }}}
-
-
 # {{{ Dependency creation and checking (without transformations)
 
 # {{{ test_add_dependency_v2
@@ -1660,9 +1661,6 @@ def test_assignment_to_subst_with_dependencies():
         """)
 
     # TODO test with multiple subst definition sites
-    # TODO what if stmt2 depends on <>tsq = b[i-1]**2 and then we do
-    #     assignment to subst? remove i'=i from dep?
-    # TODO what if, e.g., stmt3 doesn't have iname i in it?
     knl = lp.add_and_infer_dtypes(knl, {"a": np.float32})
 
     dep_eq = _isl_map_with_marked_dims(
@@ -1740,6 +1738,48 @@ def test_assignment_to_subst_with_dependencies():
 
     assert not unsatisfied_deps
 
+    # test case where subst def is removed, has deps, and
+    # inames of subst_def don't match subst usage
+
+    knl = lp.make_kernel(
+        "{[i,j,k,m]: 0 <= i,j,k,m < n}",
+        """
+        for i,j
+            <>temp0 = 0.1*i {id=stmt0}
+        end
+        for k
+            <>tsq = temp0**2  {id=stmt1,dep=stmt0}
+        end
+        for m
+            <>res = 23*tsq + 25*tsq  {id=stmt2,dep=stmt1}
+        end
+        """)
+    knl = lp.add_and_infer_dtypes(knl, {"temp0,tsq,res": np.float32})
+
+    dep_1_on_0 = make_dep_map(
+        "[n] -> { [i', j']->[k] : 0 <= i',j',k < n }", self_dep=False)
+    dep_2_on_1 = make_dep_map(
+        "[n] -> { [k']->[m] : 0 <= k',m < n }", self_dep=False)
+
+    from copy import deepcopy
+    knl = lp.add_dependency_v2(knl, "stmt1", "stmt0", deepcopy(dep_1_on_0))
+    knl = lp.add_dependency_v2(knl, "stmt2", "stmt1", deepcopy(dep_2_on_1))
+
+    knl = lp.assignment_to_subst(knl, "tsq")
+
+    dep_exp = make_dep_map(
+        "[n] -> { [i', j']->[m] : 0 <= i',j',m < n }", self_dep=False)
+
+    # Compare deps and make sure they are satisfied
+    unsatisfied_deps = _compare_dependencies(
+        knl,
+        {
+            "stmt2": {"stmt0": [dep_exp, ]},
+        },
+        return_unsatisfied=True)
+
+    assert not unsatisfied_deps
+
 # }}}
 
 

From 1700007f7d8973276dd28cc658c808713cd8f0a6 Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Wed, 2 Jun 2021 10:38:28 -0500
Subject: [PATCH 397/460] remove old code

---
 loopy/transform/subst.py | 12 ------------
 1 file changed, 12 deletions(-)

diff --git a/loopy/transform/subst.py b/loopy/transform/subst.py
index 59424c04f..3b1f13fa1 100644
--- a/loopy/transform/subst.py
+++ b/loopy/transform/subst.py
@@ -525,18 +525,6 @@ def _add_deps_to_stmt(old_dep_dict, stmt):
                             new_dep, dt.out, out_inames_to_add)
 
                         # add inames domain for new inames
-                        """
-                        # insert stmt dim
-                        from loopy.schedule.checker.utils import (
-                            reorder_dims_by_name,
-                            insert_and_name_isl_dims,
-                        )
-                        from loopy.schedule.checker.schedule import (
-                            STATEMENT_VAR_NAME,
-                        )
-                        dom_for_new_iname = insert_and_name_isl_dims(
-                            dom_for_new_iname, dt.set, [STATEMENT_VAR_NAME], 0)
-                        """
                         dom_aligned = isl.align_spaces(
                             dom_for_new_inames, new_dep.range())
 

From 7b58c4fb1295b4395311579eba376ec0b6696837 Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Wed, 2 Jun 2021 14:21:22 -0500
Subject: [PATCH 398/460] add option to find_and_rename_dim to assert that
 old_name exists

---
 loopy/schedule/checker/utils.py | 13 ++++++++++---
 1 file changed, 10 insertions(+), 3 deletions(-)

diff --git a/loopy/schedule/checker/utils.py b/loopy/schedule/checker/utils.py
index bf38a435b..0709b2ff1 100644
--- a/loopy/schedule/checker/utils.py
+++ b/loopy/schedule/checker/utils.py
@@ -156,11 +156,18 @@ def add_eq_isl_constraint_from_names(isl_map, var1, var2):
                    {1: 0, var1: 1, var2: -1}))
 
 
-def find_and_rename_dim(old_map, dim_types, old_name, new_name):
+def find_and_rename_dim(old_map, dim_types, old_name, new_name, must_exist=False):
     new_map = old_map.copy()
     for dim_type in dim_types:
-        new_map = new_map.set_dim_name(
-            dim_type, new_map.find_dim_by_name(dim_type, old_name), new_name)
+        idx = new_map.find_dim_by_name(dim_type, old_name)
+        if idx == -1:
+            if must_exist:
+                raise ValueError(
+                    "must_exist=True but did not find old_name %s in %s"
+                    % (old_name, old_map))
+            else:
+                continue
+        new_map = new_map.set_dim_name(dim_type, idx, new_name)
     return new_map
 
 

From 0e1b09c5da9296e80b5b88ecd3e5f451f74c2496 Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Wed, 2 Jun 2021 14:21:53 -0500
Subject: [PATCH 399/460] update dependencies in rename_iname()

---
 loopy/transform/iname.py | 48 ++++++++++++++++++++++++++++++++++++----
 1 file changed, 44 insertions(+), 4 deletions(-)

diff --git a/loopy/transform/iname.py b/loopy/transform/iname.py
index 4bc5394db..473419580 100644
--- a/loopy/transform/iname.py
+++ b/loopy/transform/iname.py
@@ -975,19 +975,21 @@ def duplicate_inames(kernel, inames, within, new_inames=None, suffix=None,
 
         # {{{ *Rename* iname in dependencies
 
+        # TODO use find_and_rename_dims for simpler code
+        # (see example in rename_iname)
         from loopy.transform.instruction import map_dependency_maps
         from loopy.schedule.checker.schedule import BEFORE_MARK
         old_iname_p = old_iname+BEFORE_MARK
         new_iname_p = new_iname+BEFORE_MARK
 
-        def _rename_iname_in_dep_out(dep):
+        def _rename_iname_in_dim_out(dep):
             # update iname in out-dim
             out_idx = dep.find_dim_by_name(dt.out, old_iname)
             if out_idx != -1:
                 dep = dep.set_dim_name(dt.out, out_idx, new_iname)
             return dep
 
-        def _rename_iname_in_dep_in(dep):
+        def _rename_iname_in_dim_in(dep):
             # update iname in in-dim
             in_idx = dep.find_dim_by_name(dt.in_, old_iname_p)
             if in_idx != -1:
@@ -998,10 +1000,10 @@ def _rename_iname_in_dep_in(dep):
         # TODO figure out match vs stack_match
         false_id_match = "not id:*"
         kernel = map_dependency_maps(
-            kernel, _rename_iname_in_dep_out,
+            kernel, _rename_iname_in_dim_out,
             stmt_match_depender=within, stmt_match_dependee=false_id_match)
         kernel = map_dependency_maps(
-            kernel, _rename_iname_in_dep_in,
+            kernel, _rename_iname_in_dim_in,
             stmt_match_depender=false_id_match, stmt_match_dependee=within)
 
         # }}}
@@ -1270,6 +1272,44 @@ def rename_iname(kernel, old_iname, new_iname, existing_ok=False, within=None):
 
         kernel = kernel.copy(instructions=new_instructions)
 
+        # {{{ Rename iname in dependencies
+
+        from loopy.transform.instruction import map_dependency_maps
+        from loopy.schedule.checker.schedule import BEFORE_MARK
+        from loopy.schedule.checker.utils import (
+            find_and_rename_dims,
+        )
+        old_iname_p = old_iname+BEFORE_MARK
+        new_iname_p = new_iname+BEFORE_MARK
+
+        def _rename_iname_in_dim_out(dep):
+            # Update iname in out-dim (depender dim).
+
+            # For now, out_idx should not be -1 because this will only
+            # be called on dependers
+            return find_and_rename_dims(
+                dep, [dt.out], old_iname, new_iname, must_exist=True)
+
+        def _rename_iname_in_dim_in(dep):
+            # Update iname in in-dim (dependee dim).
+
+            # For now, out_idx should not be -1 because this will only
+            # be called on dependees
+            return find_and_rename_dims(
+                dep, [dt.in_], old_iname_p, new_iname_p, must_exist=True)
+
+        # TODO figure out proper way to match none
+        # TODO figure out match vs stack_match
+        false_id_match = "not id:*"
+        kernel = map_dependency_maps(
+            kernel, _rename_iname_in_dim_out,
+            stmt_match_depender=within, stmt_match_dependee=false_id_match)
+        kernel = map_dependency_maps(
+            kernel, _rename_iname_in_dim_in,
+            stmt_match_depender=false_id_match, stmt_match_dependee=within)
+
+        # }}}
+
     else:
         kernel = duplicate_inames(
                 kernel, [old_iname], within=within, new_inames=[new_iname])

From b38681c34643cc1d220ac4f2d256aa282654a925 Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Wed, 2 Jun 2021 14:22:13 -0500
Subject: [PATCH 400/460] test updating of dependencies during rename_iname()

---
 test/test_linearization_checker.py | 65 ++++++++++++++++++++++++++++++
 1 file changed, 65 insertions(+)

diff --git a/test/test_linearization_checker.py b/test/test_linearization_checker.py
index 2fb264824..8c35b796d 100644
--- a/test/test_linearization_checker.py
+++ b/test/test_linearization_checker.py
@@ -1868,6 +1868,71 @@ def test_duplicate_inames_with_dependencies():
 # }}}
 
 
+# {{{ test_rename_inames_with_dependencies
+
+def test_rename_inames_with_dependencies():
+    # When rename_iname is called and the new iname
+    # *doesn't* already exist, then duplicate_inames is called,
+    # and we test that elsewhere. Here we test the case where
+    # rename_iname is called and the new iname already exists.
+
+    knl = lp.make_kernel(
+        "{[i,j,m]: 0 <= i,j,m < n}",
+        """
+        b[i,j] = a[i,j]  {id=stmtb}
+        c[i,j] = a[i,j]  {id=stmtc,dep=stmtb}
+        d[m] = 5.5  {id=stmtd,dep=stmtc}
+        """)
+    knl = lp.add_and_infer_dtypes(knl, {"a,d": np.float32})
+
+    dep_c_on_b = make_dep_map(
+        "[n] -> { [i', j']->[i, j] : 0 <= i,i',j,j' < n and i' = i and j' = j }",
+        self_dep=False)
+    dep_c_on_c = make_dep_map(
+        "[n] -> { [i', j']->[i, j] : 0 <= i,i',j,j' < n and i' < i and j' < j }",
+        self_dep=True)
+    dep_d_on_c = make_dep_map(
+        "[n] -> { [i', j']->[m] : 0 <= m,i',j' < n }",
+        self_dep=False)
+
+    # Create dep stmtb->stmtc
+    knl = lp.add_dependency_v2(knl, "stmtc", "stmtb", dep_c_on_b)
+    knl = lp.add_dependency_v2(knl, "stmtc", "stmtc", dep_c_on_c)
+    knl = lp.add_dependency_v2(knl, "stmtd", "stmtc", dep_d_on_c)
+
+    # {{{ Duplicate j within stmtc
+
+    knl = lp.rename_iname(
+        knl, "j", "j_new", within="id:stmtc", existing_ok=True)
+
+    dep_c_on_b_exp = make_dep_map(
+        "[n] -> { [i', j']->[i, j_new] : "
+        "0 <= i,i',j_new,j' < n and i' = i and j' = j_new}",
+        self_dep=False)
+    dep_c_on_c_exp = make_dep_map(
+        "[n] -> { [i', j_new']->[i, j_new] : "
+        "0 <= i,i',j_new,j_new' < n and i' < i and j_new' < j_new }",
+        self_dep=True)
+    dep_d_on_c_exp = make_dep_map(
+        "[n] -> { [i', j_new']->[m] : 0 <= m,i',j_new' < n }",
+        self_dep=False)
+
+    # Compare deps and make sure they are satisfied
+    unsatisfied_deps = _compare_dependencies(
+        knl,
+        {
+            "stmtc": {"stmtb": [dep_c_on_b_exp, ], "stmtc": [dep_c_on_c_exp, ]},
+            "stmtd": {"stmtc": [dep_d_on_c_exp, ]},
+        },
+        return_unsatisfied=True)
+
+    assert not unsatisfied_deps
+
+    # }}}
+
+# }}}
+
+
 # {{{ test_split_iname_with_dependencies
 
 def test_split_iname_with_dependencies():

From 77bed8f39563822d97b5c5dea1071928d50401dd Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Wed, 2 Jun 2021 16:03:32 -0500
Subject: [PATCH 401/460] add TODO

---
 loopy/transform/instruction.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/loopy/transform/instruction.py b/loopy/transform/instruction.py
index 8b539ca5f..01dd95d84 100644
--- a/loopy/transform/instruction.py
+++ b/loopy/transform/instruction.py
@@ -138,6 +138,8 @@ def add_dependency_v2(
         later.
 
     """
+    # TODO make this accept multiple deps and/or multiple stmts so that
+    # these can be added in fewer passes through the instructions
 
     if stmt_id not in kernel.id_to_insn:
         raise LoopyError("no instructions found matching '%s',"

From 493c8e8f707d5388dd93cfadd90108a9013e99ce Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Wed, 2 Jun 2021 16:25:51 -0500
Subject: [PATCH 402/460] make make_dep_map smart enough to get relevant iname
 domains from kernel if provided

---
 loopy/schedule/checker/utils.py | 19 ++++++++++++++++++-
 1 file changed, 18 insertions(+), 1 deletion(-)

diff --git a/loopy/schedule/checker/utils.py b/loopy/schedule/checker/utils.py
index 86bf935a4..152dc56a1 100644
--- a/loopy/schedule/checker/utils.py
+++ b/loopy/schedule/checker/utils.py
@@ -138,7 +138,7 @@ def append_mark_to_strings(strings, mark):
 
 # {{{ make_dep_map
 
-def make_dep_map(s, self_dep=False):
+def make_dep_map(s, self_dep=False, knl_with_domains=None):
 
     # TODO put this function in the right place
 
@@ -176,6 +176,23 @@ def make_dep_map(s, self_dep=False):
             map_with_stmts.space,
             {1: sid_after, STATEMENT_VAR_NAME: -1}))
 
+    if knl_with_domains is not None:
+        # intersect map with knl domains
+        inames = map_init.get_var_names(dt.out)
+        inames_dom = knl_with_domains.get_inames_domain(
+            inames).project_out_except(inames, [dt.set])
+        inames_dom_marked = append_mark_to_isl_map_var_names(
+            inames_dom, dt.set, BEFORE_MARK)
+
+        inames_dom_aligned = isl.align_spaces(
+            inames_dom, map_with_stmts.range())
+        inames_dom_marked_aligned = isl.align_spaces(
+            inames_dom_marked, map_with_stmts.domain())
+
+        map_with_stmts = map_with_stmts.intersect_range(
+            inames_dom_aligned
+            ).intersect_domain(inames_dom_marked_aligned)
+
     return map_with_stmts
 
 # }}}

From d7e934248a0ea03b017570cacb9d8a8454a6ac94 Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Wed, 2 Jun 2021 16:26:20 -0500
Subject: [PATCH 403/460] some initial tests for make_dep_map

---
 test/test_linearization_checker.py | 22 ++++++++++++++++++----
 1 file changed, 18 insertions(+), 4 deletions(-)

diff --git a/test/test_linearization_checker.py b/test/test_linearization_checker.py
index 0bddbc973..f46c0abd1 100644
--- a/test/test_linearization_checker.py
+++ b/test/test_linearization_checker.py
@@ -1363,12 +1363,19 @@ def test_add_dependency_v2():
     # Add a dependency to stmt_b
     dep_b_on_a = make_dep_map(
         "[pi] -> {{ [i'] -> [i] : i > i' "
+        "and {0} }}".format(assumptions_str),
+        self_dep=False, knl_with_domains=knl)
+
+    # test make_dep_map while we're here:
+    dep_b_on_a_test = _isl_map_with_marked_dims(
+        "[pi] -> {{ [{3}'=0, i'] -> [{3}=1, i] : i > i' "
         "and {0} and {1} and {2} }}".format(
             i_range_str,
             i_range_str_p,
             assumptions_str,
-            ),
-        self_dep=False)
+            STATEMENT_VAR_NAME,
+            ))
+    _align_and_compare_maps([(dep_b_on_a, dep_b_on_a_test)])
 
     knl = lp.add_dependency_v2(knl, "stmt_b", "stmt_a", dep_b_on_a)
 
@@ -1383,12 +1390,19 @@ def test_add_dependency_v2():
     # Add a second dependency to stmt_b
     dep_b_on_a_2 = make_dep_map(
         "[pi] -> {{ [i'] -> [i] : i = i' "
+        "and {0}}}".format(assumptions_str),
+        self_dep=False, knl_with_domains=knl)
+
+    # test make_dep_map while we're here:
+    dep_b_on_a_2_test = _isl_map_with_marked_dims(
+        "[pi] -> {{ [{3}'=0, i'] -> [{3}=1, i] : i = i' "
         "and {0} and {1} and {2} }}".format(
             i_range_str,
             i_range_str_p,
             assumptions_str,
-            ),
-        self_dep=False)
+            STATEMENT_VAR_NAME,
+            ))
+    _align_and_compare_maps([(dep_b_on_a_2, dep_b_on_a_2_test)])
 
     knl = lp.add_dependency_v2(knl, "stmt_b", "stmt_a", dep_b_on_a_2)
 

From 482341f27384dad9c9018a2b96af1440df8a2359 Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Wed, 2 Jun 2021 17:03:46 -0500
Subject: [PATCH 404/460] deal with case where someone gives us a StackMatch
 when mapping dependencies

---
 loopy/transform/instruction.py | 31 ++++++++++++++++++++++++++-----
 1 file changed, 26 insertions(+), 5 deletions(-)

diff --git a/loopy/transform/instruction.py b/loopy/transform/instruction.py
index ffb8877a3..d47d40d17 100644
--- a/loopy/transform/instruction.py
+++ b/loopy/transform/instruction.py
@@ -141,9 +141,15 @@ def _update_deps(stmt):
 
 
 def _parse_match_if_necessary(match_candidate):
-    from loopy.match import MatchExpressionBase
-    if not isinstance(match_candidate, MatchExpressionBase):
+    from loopy.match import (
+        MatchExpressionBase,
+        StackMatch,
+    )
+    if not isinstance(
+            match_candidate, (MatchExpressionBase, StackMatch)):
         from loopy.match import parse_match
+        # TODO assumes StackMatches are already parsed
+        # TODO determine when to use parse_stack_match (AKQ)
         return parse_match(match_candidate)
     else:
         return match_candidate
@@ -155,15 +161,28 @@ def map_dependency_lists(
     # All deps of stmts matching stmt_match_depender
     # All deps ON stmts matching stmt_match_dependee
     # (but doesn't call f() twice if dep matches both depender and dependee)
+    from loopy.match import (
+        StackMatch,
+    )
 
     match_depender = _parse_match_if_necessary(stmt_match_depender)
     match_dependee = _parse_match_if_necessary(stmt_match_dependee)
 
-    new_stmts = []
+    # TODO figure out right way to simultaneously handle
+    # both MatchExpressionBase and StackMatch
+    if isinstance(match_depender, StackMatch):
+        extra_match_depender_args = [()]
+    else:
+        extra_match_depender_args = []
+    if isinstance(match_dependee, StackMatch):
+        extra_match_dependee_args = [()]
+    else:
+        extra_match_dependee_args = []
 
+    new_stmts = []
     for stmt in kernel.instructions:
         new_deps = {}
-        if match_depender(kernel, stmt):
+        if match_depender(kernel, stmt, *extra_match_depender_args):
             # Stmt matches as depender
             # Replace all deps
             for dep_id, dep_maps in stmt.dependencies.items():
@@ -172,7 +191,9 @@ def map_dependency_lists(
             # Stmt didn't match as a depender
             # Replace deps matching dependees
             for dep_id, dep_maps in stmt.dependencies.items():
-                if match_dependee(kernel, kernel.id_to_insn[dep_id]):
+                if match_dependee(
+                        kernel, kernel.id_to_insn[dep_id],
+                        *extra_match_dependee_args):
                     new_deps[dep_id] = f(dep_maps)
                 else:
                     new_deps[dep_id] = dep_maps

From f4d5a1afd2343482693e433350a0cd18f06688af Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Wed, 2 Jun 2021 17:04:16 -0500
Subject: [PATCH 405/460] fix typo

---
 loopy/transform/iname.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/loopy/transform/iname.py b/loopy/transform/iname.py
index 473419580..b06181b5f 100644
--- a/loopy/transform/iname.py
+++ b/loopy/transform/iname.py
@@ -975,7 +975,7 @@ def duplicate_inames(kernel, inames, within, new_inames=None, suffix=None,
 
         # {{{ *Rename* iname in dependencies
 
-        # TODO use find_and_rename_dims for simpler code
+        # TODO use find_and_rename_dim for simpler code
         # (see example in rename_iname)
         from loopy.transform.instruction import map_dependency_maps
         from loopy.schedule.checker.schedule import BEFORE_MARK
@@ -1277,7 +1277,7 @@ def rename_iname(kernel, old_iname, new_iname, existing_ok=False, within=None):
         from loopy.transform.instruction import map_dependency_maps
         from loopy.schedule.checker.schedule import BEFORE_MARK
         from loopy.schedule.checker.utils import (
-            find_and_rename_dims,
+            find_and_rename_dim,
         )
         old_iname_p = old_iname+BEFORE_MARK
         new_iname_p = new_iname+BEFORE_MARK
@@ -1287,7 +1287,7 @@ def _rename_iname_in_dim_out(dep):
 
             # For now, out_idx should not be -1 because this will only
             # be called on dependers
-            return find_and_rename_dims(
+            return find_and_rename_dim(
                 dep, [dt.out], old_iname, new_iname, must_exist=True)
 
         def _rename_iname_in_dim_in(dep):
@@ -1295,7 +1295,7 @@ def _rename_iname_in_dim_in(dep):
 
             # For now, out_idx should not be -1 because this will only
             # be called on dependees
-            return find_and_rename_dims(
+            return find_and_rename_dim(
                 dep, [dt.in_], old_iname_p, new_iname_p, must_exist=True)
 
         # TODO figure out proper way to match none

From 3e498fa04ea7a10fdcdfaa0c85a4b2c00eae4244 Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Wed, 2 Jun 2021 17:05:08 -0500
Subject: [PATCH 406/460] make test_rename_inames_with_dependencies() actually
 test the case where the new iname already exists

---
 test/test_linearization_checker.py | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/test/test_linearization_checker.py b/test/test_linearization_checker.py
index 2cb6748e0..7056f3aa9 100644
--- a/test/test_linearization_checker.py
+++ b/test/test_linearization_checker.py
@@ -1891,10 +1891,11 @@ def test_rename_inames_with_dependencies():
     # rename_iname is called and the new iname already exists.
 
     knl = lp.make_kernel(
-        "{[i,j,m]: 0 <= i,j,m < n}",
+        "{[i,j,m,j_new]: 0 <= i,j,m,j_new < n}",
         """
         b[i,j] = a[i,j]  {id=stmtb}
         c[i,j] = a[i,j]  {id=stmtc,dep=stmtb}
+        e[i,j_new] = 1.1
         d[m] = 5.5  {id=stmtd,dep=stmtc}
         """)
     knl = lp.add_and_infer_dtypes(knl, {"a,d": np.float32})
@@ -1914,7 +1915,7 @@ def test_rename_inames_with_dependencies():
     knl = lp.add_dependency_v2(knl, "stmtc", "stmtc", dep_c_on_c)
     knl = lp.add_dependency_v2(knl, "stmtd", "stmtc", dep_d_on_c)
 
-    # {{{ Duplicate j within stmtc
+    # Rename j within stmtc
 
     knl = lp.rename_iname(
         knl, "j", "j_new", within="id:stmtc", existing_ok=True)
@@ -1942,8 +1943,6 @@ def test_rename_inames_with_dependencies():
 
     assert not unsatisfied_deps
 
-    # }}}
-
 # }}}
 
 

From c421d3813f5b82e14fa0b0ae64a50334cbbb1ecd Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Thu, 3 Jun 2021 15:39:35 -0500
Subject: [PATCH 407/460] add within_inames arg to add_barrier

---
 loopy/transform/add_barrier.py | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/loopy/transform/add_barrier.py b/loopy/transform/add_barrier.py
index bc324d7fa..922616dd2 100644
--- a/loopy/transform/add_barrier.py
+++ b/loopy/transform/add_barrier.py
@@ -35,7 +35,8 @@
 # {{{ add_barrier
 
 def add_barrier(kernel, insn_before="", insn_after="", id_based_on=None,
-                tags=None, synchronization_kind="global", mem_kind=None):
+                tags=None, synchronization_kind="global", mem_kind=None,
+                within_inames=None):
     """Takes in a kernel that needs to be added a barrier and returns a kernel
     which has a barrier inserted into it. It takes input of 2 instructions and
     then adds a barrier in between those 2 instructions. The expressions can
@@ -49,8 +50,11 @@ def add_barrier(kernel, insn_before="", insn_after="", id_based_on=None,
     :arg tags: The tag of the group to which the barrier must be added
     :arg synchronization_kind: Kind of barrier to be added. May be "global" or
         "local"
-    :arg kind: Type of memory to be synchronied. May be "global" or "local". Ignored
-        for "global" bariers.  If not supplied, defaults to *synchronization_kind*
+    :arg kind: Type of memory to be synchronized. May be "global" or "local". Ignored
+        for "global" barriers. If not supplied, defaults to *synchronization_kind*
+    :arg within_inames: A :class:`frozenset` of inames identifying the loops
+        within which the barrier will be executed.
+
     """
 
     if mem_kind is None:
@@ -69,6 +73,7 @@ def add_barrier(kernel, insn_before="", insn_after="", id_based_on=None,
     barrier_to_add = BarrierInstruction(depends_on=frozenset(insn_before_list),
                                         depends_on_is_final=True,
                                         id=id,
+                                        within_inames=within_inames,
                                         tags=tags,
                                         synchronization_kind=synchronization_kind,
                                         mem_kind=mem_kind)

From fb4c43d02eb0e3151dd791db3002531cb9337184 Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Sat, 5 Jun 2021 21:06:48 -0500
Subject: [PATCH 408/460] fix make_dep_map so it handles automatic domain
 creation correctly when input dims don't match output dims

---
 loopy/schedule/checker/utils.py | 28 +++++++++++++++++-----------
 1 file changed, 17 insertions(+), 11 deletions(-)

diff --git a/loopy/schedule/checker/utils.py b/loopy/schedule/checker/utils.py
index 152dc56a1..85cc9ec59 100644
--- a/loopy/schedule/checker/utils.py
+++ b/loopy/schedule/checker/utils.py
@@ -178,20 +178,26 @@ def make_dep_map(s, self_dep=False, knl_with_domains=None):
 
     if knl_with_domains is not None:
         # intersect map with knl domains
-        inames = map_init.get_var_names(dt.out)
-        inames_dom = knl_with_domains.get_inames_domain(
-            inames).project_out_except(inames, [dt.set])
-        inames_dom_marked = append_mark_to_isl_map_var_names(
-            inames_dom, dt.set, BEFORE_MARK)
+        inames_in = map_init.get_var_names(dt.in_)
+        inames_out = map_init.get_var_names(dt.out)
 
-        inames_dom_aligned = isl.align_spaces(
-            inames_dom, map_with_stmts.range())
-        inames_dom_marked_aligned = isl.align_spaces(
-            inames_dom_marked, map_with_stmts.domain())
+        inames_in_dom = knl_with_domains.get_inames_domain(
+            inames_in).project_out_except(inames_in, [dt.set])
+        inames_out_dom = knl_with_domains.get_inames_domain(
+            inames_out).project_out_except(inames_out, [dt.set])
+
+        # mark dependee inames
+        inames_in_dom_marked = append_mark_to_isl_map_var_names(
+            inames_in_dom, dt.set, BEFORE_MARK)
+
+        inames_in_dom_marked_aligned = isl.align_spaces(
+            inames_in_dom_marked, map_with_stmts.domain())
+        inames_out_dom_aligned = isl.align_spaces(
+            inames_out_dom, map_with_stmts.range())
 
         map_with_stmts = map_with_stmts.intersect_range(
-            inames_dom_aligned
-            ).intersect_domain(inames_dom_marked_aligned)
+            inames_out_dom_aligned
+            ).intersect_domain(inames_in_dom_marked_aligned)
 
     return map_with_stmts
 

From 473116c19f6c8c686052e13d235c2e6ea25e0608 Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Sat, 5 Jun 2021 21:07:09 -0500
Subject: [PATCH 409/460] add dedicated test for make_dep_map

---
 test/test_linearization_checker.py | 50 ++++++++++++++++++++++++++++++
 1 file changed, 50 insertions(+)

diff --git a/test/test_linearization_checker.py b/test/test_linearization_checker.py
index f46c0abd1..013ded908 100644
--- a/test/test_linearization_checker.py
+++ b/test/test_linearization_checker.py
@@ -1466,6 +1466,56 @@ def test_add_dependency_v2():
 # }}}
 
 
+# {{{ test_make_dep_map
+
+def test_make_dep_map():
+    # This is also tested inside other test functions, but
+    # here we specifically test case where the statement inames
+    # don't match
+
+    # Make kernel and use OLD deps to control linearization order for now
+    i_range_str = "0 <= i < n"
+    i_range_str_p = "0 <= i' < n"
+    j_range_str = "0 <= j < n"
+    j_range_str_p = "0 <= j' < n"
+    k_range_str = "0 <= k < n"
+    # k_range_str_p = "0 <= k' < n"  # (not used)
+    knl = lp.make_kernel(
+        "{[i,j,k]: %s}" % (" and ".join([i_range_str, j_range_str, k_range_str])),
+        """
+        a[i,j] = 3.14  {id=stmt_a}
+        b[k] = a[i,k]  {id=stmt_b, dep=stmt_a}
+        """,
+        name="example",
+        lang_version=(2018, 2)
+        )
+    knl = lp.add_and_infer_dtypes(knl, {"a,b": np.float32})
+
+    for stmt in knl.instructions:
+        assert not stmt.dependencies
+
+    # Add a dependency to stmt_b
+    dep_b_on_a = make_dep_map(
+        "[n] -> { [i',j'] -> [i,k] : i > i' and j' < k}",
+        self_dep=False, knl_with_domains=knl)
+
+    # Create expected dep
+    dep_b_on_a_test = _isl_map_with_marked_dims(
+        "[n] -> {{ [{0}'=0, i', j'] -> [{0}=1, i, k] : i > i' and j' < k"
+        " and {1} }}".format(
+            STATEMENT_VAR_NAME,
+            " and ".join([
+                i_range_str,
+                i_range_str_p,
+                j_range_str_p,
+                k_range_str,
+                ])
+            ))
+    _align_and_compare_maps([(dep_b_on_a, dep_b_on_a_test)])
+
+# }}}
+
+
 # {{{ test_new_dependencies_finite_diff:
 
 def test_new_dependencies_finite_diff():

From 871ccaeed9bafcd8ba99e490ea264456a7e9916b Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Thu, 10 Jun 2021 16:42:38 -0500
Subject: [PATCH 410/460] don't check deps on/by barriers at the moment

---
 loopy/schedule/checker/__init__.py | 13 +++++++++----
 1 file changed, 9 insertions(+), 4 deletions(-)

diff --git a/loopy/schedule/checker/__init__.py b/loopy/schedule/checker/__init__.py
index 99e3ffb90..8c0ab9c52 100644
--- a/loopy/schedule/checker/__init__.py
+++ b/loopy/schedule/checker/__init__.py
@@ -214,11 +214,16 @@ def find_unsatisfied_dependencies(
     #  (stmt_id_before2, stmt_id_after2): [dep1, dep2, ...],
     #  ...}
 
+    from loopy.kernel.instruction import BarrierInstruction
+    # TODO (fix) for now, don't check deps on/by barriers
     for stmt_after in knl.instructions:
-        for before_id, dep_list in stmt_after.dependencies.items():
-            # (don't compare dep maps to maps found; duplicate deps should be rare)
-            stmt_pairs_to_deps.setdefault(
-                (before_id, stmt_after.id), []).extend(dep_list)
+        if not isinstance(stmt_after, BarrierInstruction):
+            for before_id, dep_list in stmt_after.dependencies.items():
+                if not isinstance(knl.id_to_insn[before_id], BarrierInstruction):
+                    # (don't compare dep maps to maps found;
+                    # duplicate deps should be rare)
+                    stmt_pairs_to_deps.setdefault(
+                        (before_id, stmt_after.id), []).extend(dep_list)
     # }}}
 
     # {{{ Get statement instance orderings

From 98544118b68d7487f3b64f4f104e2e710d4bca5e Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Thu, 10 Jun 2021 16:48:05 -0500
Subject: [PATCH 411/460] add v2 deps to barriers (still need to add deps on
 barriers)

---
 loopy/transform/add_barrier.py | 27 +++++++++++++++++++++++++++
 1 file changed, 27 insertions(+)

diff --git a/loopy/transform/add_barrier.py b/loopy/transform/add_barrier.py
index f4f3f2d3a..617263858 100644
--- a/loopy/transform/add_barrier.py
+++ b/loopy/transform/add_barrier.py
@@ -85,10 +85,37 @@ def add_barrier(kernel, insn_before="", insn_after="", id_based_on=None,
 
     new_kernel = kernel.copy(instructions=kernel.instructions + [barrier_to_add])
     if insn_after is not None:
+        # TODO this should be a new dependency
         new_kernel = add_dependency(kernel=new_kernel,
                                  insn_match=insn_after,
                                  depends_on="id:"+id)
 
+    for insn_before_id in insns_before:
+        # make v2 dep:
+        from loopy.schedule.checker.utils import (
+            append_mark_to_strings,
+            make_dep_map,
+        )
+        from loopy.schedule.checker.schedule import BEFORE_MARK
+        inames_before = new_kernel.id_to_insn[insn_before_id].within_inames
+        inames_before_marked = append_mark_to_strings(
+            inames_before, BEFORE_MARK)
+
+        inames_after = set(within_inames) if within_inames else set()
+
+        shared_inames = inames_after & inames_before
+
+        in_space_str = ", ".join(inames_before_marked)
+        out_space_str = ", ".join(inames_after)
+        constraint_str = " and ".join([
+            "{0}{1} = {0}".format(iname, BEFORE_MARK) for iname in shared_inames])
+
+        dep_v2 = make_dep_map(
+            f"{{ [{in_space_str}] -> [{out_space_str}] : {constraint_str} }}",
+            knl_with_domains=new_kernel)
+        from loopy import add_dependency_v2
+        new_kernel = add_dependency_v2(new_kernel, id, insn_before_id, dep_v2)
+
     return new_kernel
 
 # }}}

From bde5faffce5e78b5ecf478a8f469666fa4cdc527 Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Thu, 10 Jun 2021 17:20:09 -0500
Subject: [PATCH 412/460] (temporary fix) if using v2-deps for linearization,
 don't error in append_barrier_or_raise_error()

---
 loopy/schedule/__init__.py | 37 +++++++++++++++++++++++--------------
 1 file changed, 23 insertions(+), 14 deletions(-)

diff --git a/loopy/schedule/__init__.py b/loopy/schedule/__init__.py
index 7009d182a..56bd0745a 100644
--- a/loopy/schedule/__init__.py
+++ b/loopy/schedule/__init__.py
@@ -1829,20 +1829,28 @@ def _insn_ids_reaching_end(schedule, kind, reverse):
     return insn_ids_alive_at_scope[-1]
 
 
-def append_barrier_or_raise_error(schedule, dep, verify_only):
+def append_barrier_or_raise_error(
+        schedule, dep, verify_only, use_dependencies_v2=False):
     if verify_only:
-        from loopy.diagnostic import MissingBarrierError
-        raise MissingBarrierError(
-                "Dependency '%s' (for variable '%s') "
-                "requires synchronization "
-                "by a %s barrier (add a 'no_sync_with' "
-                "instruction option to state that no "
-                "synchronization is needed)"
-                % (
-                    dep.dep_descr.format(
-                        tgt=dep.target.id, src=dep.source.id),
-                    dep.variable,
-                    dep.var_kind))
+        err_str = (
+            "Dependency '%s' (for variable '%s') "
+            "requires synchronization "
+            "by a %s barrier (add a 'no_sync_with' "
+            "instruction option to state that no "
+            "synchronization is needed)"
+            % (
+                dep.dep_descr.format(
+                    tgt=dep.target.id, src=dep.source.id),
+                dep.variable,
+                dep.var_kind))
+        # TODO need to update all this with v2 deps. For now, make this a warning.
+        # Do full fix for this later
+        if use_dependencies_v2:
+            from warnings import warn
+            warn(err_str)
+        else:
+            from loopy.diagnostic import MissingBarrierError
+            raise MissingBarrierError(err_str)
     else:
         comment = "for {} ({})".format(
                 dep.variable, dep.dep_descr.format(
@@ -1909,7 +1917,8 @@ def insert_barriers_at_outer_level(schedule, reverse=False):
                 for dep in chain.from_iterable(
                         dep_tracker.gen_dependencies_with_target_at(insn)
                         for insn in loop_head):
-                    append_barrier_or_raise_error(result, dep, verify_only)
+                    append_barrier_or_raise_error(
+                        result, dep, verify_only, kernel.options.use_dependencies_v2)
                     # This barrier gets inserted outside the loop, hence it is
                     # executed unconditionally and so kills all sources before
                     # the loop.

From a8ec66d7357c494214f94140c92ba9ce2c67ab52 Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Thu, 10 Jun 2021 18:07:05 -0500
Subject: [PATCH 413/460] set obj_bigger_ok=True when aligning inames domain
 with dep in make_dep_map

---
 loopy/schedule/checker/utils.py | 7 +++++--
 loopy/target/c/compyte          | 2 +-
 2 files changed, 6 insertions(+), 3 deletions(-)

diff --git a/loopy/schedule/checker/utils.py b/loopy/schedule/checker/utils.py
index 85cc9ec59..4bba569e8 100644
--- a/loopy/schedule/checker/utils.py
+++ b/loopy/schedule/checker/utils.py
@@ -190,10 +190,13 @@ def make_dep_map(s, self_dep=False, knl_with_domains=None):
         inames_in_dom_marked = append_mark_to_isl_map_var_names(
             inames_in_dom, dt.set, BEFORE_MARK)
 
+        # align spaces adds the stmt var
         inames_in_dom_marked_aligned = isl.align_spaces(
-            inames_in_dom_marked, map_with_stmts.domain())
+            inames_in_dom_marked, map_with_stmts.domain(),
+            obj_bigger_ok=True)  # e.g., params might exist
         inames_out_dom_aligned = isl.align_spaces(
-            inames_out_dom, map_with_stmts.range())
+            inames_out_dom, map_with_stmts.range(),
+            obj_bigger_ok=True)  # e.g., params might exist
 
         map_with_stmts = map_with_stmts.intersect_range(
             inames_out_dom_aligned
diff --git a/loopy/target/c/compyte b/loopy/target/c/compyte
index 71bffa1ae..7e48e1166 160000
--- a/loopy/target/c/compyte
+++ b/loopy/target/c/compyte
@@ -1 +1 @@
-Subproject commit 71bffa1ae64ed98b9d922c79a6f9cc7eb4fd642f
+Subproject commit 7e48e1166a13cfbb7b60f909b071f088034ffda1

From de7225d1b6b693554e9b6a8a64588f61d7ed05d0 Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Thu, 10 Jun 2021 18:11:04 -0500
Subject: [PATCH 414/460] (temporary fix) if using v2-deps for linearization,
 don't error in append_barrier_or_raise_error()

---
 loopy/schedule/__init__.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/loopy/schedule/__init__.py b/loopy/schedule/__init__.py
index 56bd0745a..3f28a4b3a 100644
--- a/loopy/schedule/__init__.py
+++ b/loopy/schedule/__init__.py
@@ -1950,7 +1950,8 @@ def insert_barriers_at_outer_level(schedule, reverse=False):
             elif isinstance(sched_item, RunInstruction):
                 for dep in dep_tracker.gen_dependencies_with_target_at(
                         sched_item.insn_id):
-                    append_barrier_or_raise_error(result, dep, verify_only)
+                    append_barrier_or_raise_error(
+                        result, dep, verify_only, kernel.options.use_dependencies_v2)
                     dep_tracker.discard_all_sources()
                     break
                 result.append(sched_item)

From 0f09b36574a31d0a8da8c4cbaccc27e6f922dd15 Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Thu, 10 Jun 2021 21:47:58 -0500
Subject: [PATCH 415/460] first attempt at handling dependencies during
 prefetch

---
 loopy/transform/precompute.py | 109 ++++++++++++++++++++++++++++++++++
 1 file changed, 109 insertions(+)

diff --git a/loopy/transform/precompute.py b/loopy/transform/precompute.py
index cefed807d..c45473d67 100644
--- a/loopy/transform/precompute.py
+++ b/loopy/transform/precompute.py
@@ -154,6 +154,9 @@ def __init__(self, rule_mapping_context, subst_name, subst_tag, within,
         self.compute_read_variables = compute_read_variables
         self.compute_insn_depends_on = set()
 
+        # TODO determine whether there's a better strategy for this
+        self.things_replaced = set()
+
     def map_substitution(self, name, tag, arguments, expn_state):
         if not (
                 name == self.subst_name
@@ -234,6 +237,9 @@ def map_kernel(self, kernel):
                             insn.depends_on
                             | frozenset([self.compute_dep_id])))
 
+                if hasattr(insn, "id"):
+                    self.things_replaced.add(insn.id)
+
                 for dep in insn.depends_on:
                     if dep in excluded_insn_ids:
                         continue
@@ -1039,6 +1045,109 @@ def add_assumptions(d):
         from loopy.kernel.tools import assign_automatic_axes
         kernel = assign_automatic_axes(kernel)
 
+
+    # {{{ update dependencies
+
+    # Get some values that will be useful later
+    fetch_stmt_id = compute_insn_id
+    fetch_stmt = kernel.id_to_insn[compute_insn_id]
+    fetch_inames = fetch_stmt.within_inames
+
+    # Go through all stmts that now use the fetch stuff
+    for usage_stmt_id in invr.things_replaced:
+        from loopy.schedule.checker.utils import (
+            make_dep_map,
+            append_mark_to_strings,
+            remove_dim_by_name,
+            add_and_name_isl_dims,
+            insert_and_name_isl_dims,
+        )
+        from loopy.schedule.checker.schedule import (
+            BEFORE_MARK,
+            STATEMENT_VAR_NAME,
+        )
+        # Get some values that will be useful later
+        usage_stmt = kernel.id_to_insn[usage_stmt_id]
+        usage_inames = usage_stmt.within_inames
+        shared_inames = fetch_inames & usage_inames
+        assert shared_inames == usage_stmt.within_inames - set(sweep_inames)
+        fetch_inames_not_shared = fetch_inames - shared_inames
+
+        # {{{ create dep fetch_stmt->usage_stmt : SAME(shared_inames)
+
+        dep_in_names = list(fetch_inames)  # want a copy anyway
+        dep_in_names_marked = append_mark_to_strings(dep_in_names, BEFORE_MARK)
+        dep_out_names = usage_inames
+
+        in_space_str = ", ".join(dep_in_names_marked)
+        out_space_str = ", ".join(dep_out_names)
+        constraint_str = " and ".join([
+            "{0}{1} = {0}".format(iname, BEFORE_MARK) for iname in shared_inames])
+        dep_usage_on_fetch = make_dep_map(
+            f"{{ [{in_space_str}] -> [{out_space_str}] : {constraint_str} }}",
+            knl_with_domains=kernel)
+        # (add this dep below after next step)
+
+        # }}}
+
+        from islpy import dim_type as dt
+        for dependee_id, old_deps in usage_stmt.dependencies.items():
+            for old_dep in old_deps:
+                # {{{ create dep dependee->fetch_stmt
+
+                new_dep = old_dep.copy()
+
+                old_out_inames = old_dep.get_var_names(dt.out)
+                assert (
+                    set(old_out_inames) - set([STATEMENT_VAR_NAME, ]) ==
+                    set(usage_inames))
+
+                # Remove the sweep inames from out dims
+                for sweep_iname in sweep_inames:
+                    new_dep = remove_dim_by_name(new_dep, dt.out, sweep_iname)
+
+                # These new out inames will take on full domain values
+
+                # Add new_unconstrained_out_names to out dims
+                new_dep = add_and_name_isl_dims(
+                    new_dep, dt.out, fetch_inames_not_shared)
+
+                # Intersect dom for fetch_inames_not_shared
+                dom_to_intersect = kernel.get_inames_domain(
+                    fetch_inames_not_shared
+                    ).project_out_except(fetch_inames_not_shared, [dt.set])
+
+                dom_to_intersect_aligned = isl.align_spaces(
+                    dom_to_intersect, new_dep.range(),
+                    obj_bigger_ok=True)  # e.g., params might exist?
+
+                new_dep = new_dep.intersect_range(dom_to_intersect_aligned)
+
+                # {{{ Old dep might have been self-dep, set stmt var correctly
+
+                # add and remove stmt dim
+                new_dep = remove_dim_by_name(new_dep, dt.out, STATEMENT_VAR_NAME)
+                new_dep = insert_and_name_isl_dims(new_dep, dt.out, [STATEMENT_VAR_NAME], 0)
+                # set stmt dim value
+                sid_out = 0 if fetch_stmt_id == dependee_id else 1
+                new_dep = new_dep.add_constraint(
+                    isl.Constraint.eq_from_names(
+                        new_dep.space,
+                        {1: sid_out, STATEMENT_VAR_NAME: -1}))
+                # }}}
+
+                # Add this dep: dependee->fetch : dep
+                kernel = lp.add_dependency_v2(
+                    kernel, fetch_stmt_id, dependee_id, new_dep)
+
+                # }}}
+
+        # Add other new dep from above: fetch->usage
+        kernel = lp.add_dependency_v2(
+            kernel, usage_stmt_id, fetch_stmt_id, dep_usage_on_fetch)
+
+    # }}}
+
     return kernel
 
 # vim: foldmethod=marker

From 2f7c6bf8a6566a796d325a09cb15dc1e85b49b2a Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Thu, 10 Jun 2021 22:20:35 -0500
Subject: [PATCH 416/460] fix a couple bugs in precompute dep handling

---
 loopy/transform/precompute.py | 18 ++++++++++++------
 1 file changed, 12 insertions(+), 6 deletions(-)

diff --git a/loopy/transform/precompute.py b/loopy/transform/precompute.py
index c45473d67..3d21422fe 100644
--- a/loopy/transform/precompute.py
+++ b/loopy/transform/precompute.py
@@ -1045,7 +1045,6 @@ def add_assumptions(d):
         from loopy.kernel.tools import assign_automatic_axes
         kernel = assign_automatic_axes(kernel)
 
-
     # {{{ update dependencies
 
     # Get some values that will be useful later
@@ -1070,7 +1069,8 @@ def add_assumptions(d):
         usage_stmt = kernel.id_to_insn[usage_stmt_id]
         usage_inames = usage_stmt.within_inames
         shared_inames = fetch_inames & usage_inames
-        assert shared_inames == usage_stmt.within_inames - set(sweep_inames)
+        # TODO understand why this isn't true:
+        # assert shared_inames == usage_stmt.within_inames - set(sweep_inames)
         fetch_inames_not_shared = fetch_inames - shared_inames
 
         # {{{ create dep fetch_stmt->usage_stmt : SAME(shared_inames)
@@ -1093,6 +1093,7 @@ def add_assumptions(d):
         from islpy import dim_type as dt
         for dependee_id, old_deps in usage_stmt.dependencies.items():
             for old_dep in old_deps:
+                # old dep: dependee->usage_stmt
                 # {{{ create dep dependee->fetch_stmt
 
                 new_dep = old_dep.copy()
@@ -1102,11 +1103,15 @@ def add_assumptions(d):
                     set(old_out_inames) - set([STATEMENT_VAR_NAME, ]) ==
                     set(usage_inames))
 
-                # Remove the sweep inames from out dims
-                for sweep_iname in sweep_inames:
-                    new_dep = remove_dim_by_name(new_dep, dt.out, sweep_iname)
+                non_shared_inames = set(usage_inames) - shared_inames
+                # Remove the inames from old out dims that will not appear in new out dims
+                for non_shared_iname in non_shared_inames:
+                    new_dep = remove_dim_by_name(new_dep, dt.out, non_shared_iname)
 
                 # These new out inames will take on full domain values
+                assert (
+                    (set(usage_inames) - non_shared_inames) | fetch_inames_not_shared
+                    == fetch_inames)
 
                 # Add new_unconstrained_out_names to out dims
                 new_dep = add_and_name_isl_dims(
@@ -1127,7 +1132,8 @@ def add_assumptions(d):
 
                 # add and remove stmt dim
                 new_dep = remove_dim_by_name(new_dep, dt.out, STATEMENT_VAR_NAME)
-                new_dep = insert_and_name_isl_dims(new_dep, dt.out, [STATEMENT_VAR_NAME], 0)
+                new_dep = insert_and_name_isl_dims(
+                    new_dep, dt.out, [STATEMENT_VAR_NAME], 0)
                 # set stmt dim value
                 sid_out = 0 if fetch_stmt_id == dependee_id else 1
                 new_dep = new_dep.add_constraint(

From e307445d2bbf7bc9cc3c8d7e6cf9c61afe9e4c1a Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Thu, 10 Jun 2021 22:21:07 -0500
Subject: [PATCH 417/460] test dep handling in add_prefetch

---
 test/test_linearization_checker.py | 68 ++++++++++++++++++++++++++++++
 1 file changed, 68 insertions(+)

diff --git a/test/test_linearization_checker.py b/test/test_linearization_checker.py
index e3302846f..490a26f57 100644
--- a/test/test_linearization_checker.py
+++ b/test/test_linearization_checker.py
@@ -2494,6 +2494,74 @@ def test_map_domain_with_stencil_dependencies():
 
 # }}}
 
+
+# {{{ test_add_prefetch_with_dependencies
+
+def test_add_prefetch_with_dependencies():
+
+    lp.set_caching_enabled(False)
+    knl = lp.make_kernel(
+        "[p] -> { [i,j,k,m] : 0 <= i,j < p and 0 <= k,m < 16}",
+        """
+        for i,j,k,m
+            a[i+1,j+1,k+1,m+1] = a[i,j,k,m]  {id=stmt}
+        end
+        """,
+        name="example",
+        assumptions="p >= 1",
+        lang_version=(2018, 2)
+        )
+    knl = lp.add_and_infer_dtypes(knl, {"a": np.float32})
+
+    dep_init = make_dep_map(
+        "{ [i',j',k',m'] -> [i,j,k,m] : "
+        "i' + 1 = i and j' + 1 = j and k' + 1 = k and m' + 1 = m }",
+        self_dep=True, knl_with_domains=knl)
+    knl = lp.add_dependency_v2(knl, "stmt", "stmt", dep_init)
+
+    # Compare deps and make sure they are satisfied
+    unsatisfied_deps = _compare_dependencies(
+        knl,
+        {"stmt": {"stmt": [dep_init, ]}},
+        return_unsatisfied=True)
+
+    assert not unsatisfied_deps
+
+    knl = lp.add_prefetch(
+        knl, "a", sweep_inames=["k", "m"],
+        fetch_outer_inames=frozenset({"i", "j"}),
+        # dim_arg_names=["k_fetch", "m_fetch"],  # TODO not sure why these don't work
+        )
+
+    # create expected deps
+    dep_stmt_on_fetch_exp = make_dep_map(
+        "{ [i',j',a_dim_2',a_dim_3'] -> [i,j,k,m] : "
+        "i' = i and j' = j }",
+        knl_with_domains=knl)
+    dep_fetch_on_stmt_exp = make_dep_map(
+        "{ [i',j',k',m'] -> [i,j,a_dim_2,a_dim_3] : "
+        "i' + 1 = i and j' + 1 = j "
+        "and 0 <= k',m' < 15 "
+        "}",
+        knl_with_domains=knl)
+    # (make_dep_map will set k',m' upper bound to 16, so add manually^)
+
+    # Why is this necessary to avoid dependency cycle?
+    knl.id_to_insn["a_fetch_rule"].depends_on_is_final = True
+
+    # Compare deps and make sure they are satisfied
+    unsatisfied_deps = _compare_dependencies(
+        knl,
+        {
+            "stmt": {"stmt": [dep_init], "a_fetch_rule": [dep_stmt_on_fetch_exp]},
+            "a_fetch_rule": {"stmt": [dep_fetch_on_stmt_exp]},
+        },
+        return_unsatisfied=True)
+
+    assert not unsatisfied_deps
+
+# }}}
+
 # }}}
 
 # }}}

From 236d24a9088bbff61481b7d9a99cc83f7ff99eaa Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Tue, 15 Jun 2021 03:17:53 -0500
Subject: [PATCH 418/460] (temporariliy) add non_linearizing_deps attribute to
 instruction (set of dependee ids to be ignored when creating cartoon dag)

---
 loopy/kernel/instruction.py | 23 +++++++++++++++++++++--
 1 file changed, 21 insertions(+), 2 deletions(-)

diff --git a/loopy/kernel/instruction.py b/loopy/kernel/instruction.py
index 6ce257d31..e3d76bd1f 100644
--- a/loopy/kernel/instruction.py
+++ b/loopy/kernel/instruction.py
@@ -222,6 +222,7 @@ class InstructionBase(ImmutableRecord, Taggable):
 
     def __init__(self, id, depends_on, depends_on_is_final,
             dependencies,
+            non_linearizing_deps,
             groups, conflicts_with_groups,
             no_sync_with,
             within_inames_is_final, within_inames,
@@ -253,6 +254,9 @@ def __init__(self, id, depends_on, depends_on_is_final,
 
         if dependencies is None:
             dependencies = {}
+        # TODO dependee ids for deps that don't affect cartoon dag
+        if non_linearizing_deps is None:
+            non_linearizing_deps = set()
 
         if groups is None:
             groups = frozenset()
@@ -311,6 +315,7 @@ def __init__(self, id, depends_on, depends_on_is_final,
                 depends_on=depends_on,
                 depends_on_is_final=depends_on_is_final,
                 dependencies=dependencies,
+                non_linearizing_deps=non_linearizing_deps,  # TODO
                 no_sync_with=no_sync_with,
                 groups=groups, conflicts_with_groups=conflicts_with_groups,
                 within_inames_is_final=within_inames_is_final,
@@ -405,6 +410,9 @@ def get_str_options(self):
             result.append("dep="+":".join(self.depends_on))
         if self.dependencies:
             result.append("dependencies="+":".join(self.dependencies.keys()))
+        if self.non_linearizing_deps:
+            result.append(
+                "non_linearizing_deps="+":".join(self.non_linearizing_deps))
         if self.no_sync_with:
             result.append("nosync="+":".join(
                     "%s@%s" % entry for entry in self.no_sync_with))
@@ -475,6 +483,7 @@ def __setstate__(self, val):
             self.id = intern(self.id)
         self.depends_on = intern_frozenset_of_ids(self.depends_on)
         # TODO something with dependencies?
+        # TODO something with non_linearizing_deps?
         self.groups = intern_frozenset_of_ids(self.groups)
         self.conflicts_with_groups = (
                 intern_frozenset_of_ids(self.conflicts_with_groups))
@@ -883,6 +892,7 @@ def __init__(self,
             depends_on=None,
             depends_on_is_final=None,
             dependencies=None,
+            non_linearizing_deps=None,  # TODO
             groups=None,
             conflicts_with_groups=None,
             no_sync_with=None,
@@ -897,6 +907,7 @@ def __init__(self,
                 depends_on=depends_on,
                 depends_on_is_final=depends_on_is_final,
                 dependencies=dependencies,
+                non_linearizing_deps=non_linearizing_deps,  # TODO
                 groups=groups,
                 conflicts_with_groups=conflicts_with_groups,
                 no_sync_with=no_sync_with,
@@ -1033,6 +1044,7 @@ def __init__(self,
             depends_on=None,
             depends_on_is_final=None,
             dependencies=None,
+            non_linearizing_deps=None,  # TODO
             groups=None,
             conflicts_with_groups=None,
             no_sync_with=None,
@@ -1047,6 +1059,7 @@ def __init__(self,
                 depends_on=depends_on,
                 depends_on_is_final=depends_on_is_final,
                 dependencies=dependencies,
+                non_linearizing_deps=non_linearizing_deps,  # TODO
                 groups=groups,
                 conflicts_with_groups=conflicts_with_groups,
                 no_sync_with=no_sync_with,
@@ -1234,6 +1247,7 @@ def __init__(self,
             depends_on=None,
             depends_on_is_final=None,
             dependencies=None,
+            non_linearizing_deps=None,  # TODO
             groups=None,
             conflicts_with_groups=None,
             no_sync_with=None,
@@ -1257,6 +1271,7 @@ def __init__(self,
                 depends_on=depends_on,
                 depends_on_is_final=depends_on_is_final,
                 dependencies=dependencies,
+                non_linearizing_deps=non_linearizing_deps,  # TODO
                 groups=groups, conflicts_with_groups=conflicts_with_groups,
                 no_sync_with=no_sync_with,
                 within_inames_is_final=within_inames_is_final,
@@ -1402,7 +1417,8 @@ def __init__(
             id=None,
             depends_on=None,
             depends_on_is_final=None,
-            dependencies=None,
+            dependencies=None,  # TODO
+            non_linearizing_deps=None,
             groups=None,
             conflicts_with_groups=None,
             no_sync_with=None,
@@ -1416,6 +1432,7 @@ def __init__(
                 depends_on=depends_on,
                 depends_on_is_final=depends_on_is_final,
                 dependencies=dependencies,
+                non_linearizing_deps=non_linearizing_deps,  # TODO
                 groups=groups,
                 conflicts_with_groups=conflicts_with_groups,
                 no_sync_with=no_sync_with,
@@ -1470,7 +1487,8 @@ def __init__(
             id,
             depends_on=None,
             depends_on_is_final=None,
-            dependencies=None,
+            dependencies=None,  # TODO
+            non_linearizing_deps=None,
             groups=None,
             conflicts_with_groups=None,
             no_sync_with=None,
@@ -1490,6 +1508,7 @@ def __init__(
                 depends_on=depends_on,
                 depends_on_is_final=depends_on_is_final,
                 dependencies=dependencies,
+                non_linearizing_deps=non_linearizing_deps,  # TODO
                 groups=groups,
                 conflicts_with_groups=conflicts_with_groups,
                 no_sync_with=no_sync_with,

From 2fe181f151aa6ad8394e1a585876d31564115658 Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Tue, 15 Jun 2021 03:19:23 -0500
Subject: [PATCH 419/460] when intersecting with SAME, ignore dependees in
 stmt.non_linearizing_deps

---
 loopy/schedule/checker/dependency.py | 13 +++++++++++--
 1 file changed, 11 insertions(+), 2 deletions(-)

diff --git a/loopy/schedule/checker/dependency.py b/loopy/schedule/checker/dependency.py
index 97b5321bd..47199a243 100644
--- a/loopy/schedule/checker/dependency.py
+++ b/loopy/schedule/checker/dependency.py
@@ -87,9 +87,18 @@ def filter_deps_by_intersection_with_SAME(knl):
 
             for dependee_id, dep_maps in stmt.dependencies.items():
 
+                # Continue if we've been told to ignore this dependee
+                if stmt.non_linearizing_deps is None:
+                    dependees_to_ignore = set()
+                else:
+                    dependees_to_ignore = stmt.non_linearizing_deps
+                if dependee_id in dependees_to_ignore:
+                    # TODO better fix for this...?
+                    continue
+
                 # Continue if we already have this pair
-                if dependee_id in deps_filtered.keys() and (
-                        depender_id in deps_filtered[dependee_id]):
+                if depender_id in deps_filtered.keys() and (
+                        dependee_id in deps_filtered[depender_id]):
                     continue
 
                 for dep_map in dep_maps:

From 6377454e08e6c5bc03b237616f646bfbb731e89c Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Wed, 30 Jun 2021 17:27:54 -0500
Subject: [PATCH 420/460] count globals pretending to be temporary variables

---
 loopy/statistics.py | 12 +++++++++++-
 1 file changed, 11 insertions(+), 1 deletion(-)

diff --git a/loopy/statistics.py b/loopy/statistics.py
index ef335abce..fbd44bdb3 100755
--- a/loopy/statistics.py
+++ b/loopy/statistics.py
@@ -1069,13 +1069,23 @@ def map_subscript(self, expr):
         except AttributeError:
             var_tags = frozenset()
 
+        is_temp = False
         if name in self.knl.arg_dict:
             array = self.knl.arg_dict[name]
+        elif name in self.knl.temporary_variables:
+            # this a temporary variable, but might have global address space
+            from loopy.kernel.data import AddressSpace
+            array = self.knl.temporary_variables[name]
+            if array.address_space != AddressSpace.GLOBAL:
+                # this is a temporary variable
+                return self.rec(expr.index)
+            # this is a temporary variable with global address space
+            is_temp = True
         else:
             # this is a temporary variable
             return self.rec(expr.index)
 
-        if not isinstance(array, lp.ArrayArg):
+        if (not is_temp) and not isinstance(array, lp.ArrayArg):
             # this array is not in global memory
             return self.rec(expr.index)
 

From 20525dfc17c8feb1d60bb362172b584673507c36 Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Wed, 14 Jul 2021 16:13:39 -0500
Subject: [PATCH 421/460] (callables update) call get_one_linearized_kernel on
 program[knl]

---
 test/test_linearization_checker.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/test/test_linearization_checker.py b/test/test_linearization_checker.py
index 3c927a9ce..dff64764c 100644
--- a/test/test_linearization_checker.py
+++ b/test/test_linearization_checker.py
@@ -76,7 +76,6 @@ def test_lexschedule_creation():
             e[t] = f[t]  {id=insn_d, dep=insn_c}
         end
         """,
-        name="example",
         assumptions="pi,pj,pk,pt >= 1",
         )
     knl = lp.add_and_infer_dtypes(
@@ -87,7 +86,7 @@ def test_lexschedule_creation():
 
     # get a linearization
     knl = preprocess_kernel(knl)
-    knl = get_one_linearized_kernel(knl)
+    knl = get_one_linearized_kernel(knl["loopy_kernel"], knl.callables_table)
     linearization_items = knl.linearization
 
     def _lex_space_string(dim_vals):

From bec6dca187c286c41b0245e257314154055f3237 Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Wed, 14 Jul 2021 16:23:12 -0500
Subject: [PATCH 422/460] change default arg from set to frozenset

---
 loopy/schedule/checker/schedule.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/loopy/schedule/checker/schedule.py b/loopy/schedule/checker/schedule.py
index bc71df5d8..64dd377f2 100644
--- a/loopy/schedule/checker/schedule.py
+++ b/loopy/schedule/checker/schedule.py
@@ -109,7 +109,7 @@ def generate_pairwise_schedules(
         knl,
         linearization_items,
         insn_id_pairs,
-        loops_to_ignore=set(),
+        loops_to_ignore=frozenset(),
         ):
     r"""For each statement pair in a subset of all statement pairs found in a
     linearized kernel, determine the (relative) order in which the statement

From b98a8de7db4f32f3ac2195f00d65cbbbf107147d Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Wed, 14 Jul 2021 16:27:21 -0500
Subject: [PATCH 423/460] (callables update) call get_one_linearized_kernel on
 program[knl] in doctest

---
 loopy/schedule/checker/__init__.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/loopy/schedule/checker/__init__.py b/loopy/schedule/checker/__init__.py
index f9e9933c6..51100bc92 100644
--- a/loopy/schedule/checker/__init__.py
+++ b/loopy/schedule/checker/__init__.py
@@ -68,8 +68,11 @@ def get_schedules_for_statement_pairs(
         >>> knl = lp.add_and_infer_dtypes(knl, {"a": np.float32, "b": np.float32})
         >>> knl = lp.prioritize_loops(knl, "i,j")
         >>> knl = lp.prioritize_loops(knl, "i,k")
+        >>> # Preprocess
+        >>> knl = lp.preprocess_kernel(knl)
         >>> # Get a linearization
-        >>> knl = lp.get_one_linearized_kernel(lp.preprocess_kernel(knl))
+        >>> knl = lp.get_one_linearized_kernel(
+        ...     knl["loopy_kernel"], knl.callables_table)
         >>> # Get a pairwise schedule -----------------------------------------------
         >>> from loopy.schedule.checker import get_schedules_for_statement_pairs
         >>> # Get two maps ----------------------------------------------------------

From 0680d4dff50ea9e62a6d9689e46b6a6af10e3330 Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Wed, 14 Jul 2021 16:57:06 -0500
Subject: [PATCH 424/460] change empty list default args to empty tuples

---
 test/test_linearization_checker.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/test/test_linearization_checker.py b/test/test_linearization_checker.py
index 9a997e0b9..daa43cd95 100644
--- a/test/test_linearization_checker.py
+++ b/test/test_linearization_checker.py
@@ -65,7 +65,7 @@ def _align_and_compare_maps(maps):
         assert map1_aligned == map2
 
 
-def _lex_point_string(dim_vals, lid_inames=[], gid_inames=[]):
+def _lex_point_string(dim_vals, lid_inames=(), gid_inames=()):
     # Return a string describing a point in a lex space
     # by assigning values to lex dimension variables
     # (used to create maps below)

From 58556767314ca008adecdb79eb287ba269ad1c46 Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Wed, 14 Jul 2021 17:16:09 -0500
Subject: [PATCH 425/460] rename IndexTag->InameTag

---
 loopy/schedule/checker/schedule.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/loopy/schedule/checker/schedule.py b/loopy/schedule/checker/schedule.py
index efc0b8047..0d7d9aec6 100644
--- a/loopy/schedule/checker/schedule.py
+++ b/loopy/schedule/checker/schedule.py
@@ -239,7 +239,7 @@ def get_pairwise_statement_orderings_inner(
     """
 
     from loopy.schedule import (EnterLoop, LeaveLoop, Barrier, RunInstruction)
-    from loopy.kernel.data import (LocalIndexTag, GroupIndexTag)
+    from loopy.kernel.data import (LocalInameTag, GroupInameTag)
     from loopy.schedule.checker.lexicographic_order_map import (
         create_lex_order_map,
         get_statement_ordering_map,
@@ -368,16 +368,16 @@ def get_pairwise_statement_orderings_inner(
     gid_lex_dim_names = set()
     par_iname_constraint_dicts = {}
     for iname in knl.all_inames():
-        ltag = knl.iname_tags_of_type(iname, LocalIndexTag)
+        ltag = knl.iname_tags_of_type(iname, LocalInameTag)
         if ltag:
             assert len(ltag) == 1  # (should always be true)
             ltag_var = LTAG_VAR_NAMES[ltag.pop().axis]
             lid_lex_dim_names.add(ltag_var)
             par_iname_constraint_dicts[iname] = {1: 0, iname: 1, ltag_var: -1}
 
-            continue  # Shouldn't be any GroupIndexTags
+            continue  # Shouldn't be any GroupInameTags
 
-        gtag = knl.iname_tags_of_type(iname, GroupIndexTag)
+        gtag = knl.iname_tags_of_type(iname, GroupInameTag)
         if gtag:
             assert len(gtag) == 1  # (should always be true)
             gtag_var = GTAG_VAR_NAMES[gtag.pop().axis]

From 31718ae0e9240e865c7d20cab563d81071850176 Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Thu, 15 Jul 2021 16:50:37 -0500
Subject: [PATCH 426/460] after callables update, use knl[loopy_kernel] where
 needed

---
 test/test_linearization_checker.py | 23 ++++++++---------------
 1 file changed, 8 insertions(+), 15 deletions(-)

diff --git a/test/test_linearization_checker.py b/test/test_linearization_checker.py
index ea1cff04f..012a73458 100644
--- a/test/test_linearization_checker.py
+++ b/test/test_linearization_checker.py
@@ -1345,21 +1345,19 @@ def test_add_dependency_v2():
         b[i] = a[i]  {id=stmt_b, dep=stmt_a}
         c[i] = b[i]  {id=stmt_c, dep=stmt_b}
         """,
-        name="example",
-        assumptions=assumptions_str,
         lang_version=(2018, 2)
         )
     knl = lp.add_and_infer_dtypes(
             knl, {"a": np.float32, "b": np.float32, "c": np.float32})
 
-    for stmt in knl.instructions:
+    for stmt in knl["loopy_kernel"].instructions:
         assert not stmt.dependencies
 
     # Add a dependency to stmt_b
     dep_b_on_a = make_dep_map(
         "[pi] -> {{ [i'] -> [i] : i > i' "
         "and {0} }}".format(assumptions_str),
-        self_dep=False, knl_with_domains=knl)
+        self_dep=False, knl_with_domains=knl["loopy_kernel"])
 
     # test make_dep_map while we're here:
     dep_b_on_a_test = _isl_map_with_marked_dims(
@@ -1374,7 +1372,7 @@ def test_add_dependency_v2():
 
     knl = lp.add_dependency_v2(knl, "stmt_b", "stmt_a", dep_b_on_a)
 
-    for stmt in knl.instructions:
+    for stmt in knl["loopy_kernel"].instructions:
         if stmt.id == "stmt_b":
             assert stmt.dependencies == {
                 "stmt_a": [dep_b_on_a, ],
@@ -1386,7 +1384,7 @@ def test_add_dependency_v2():
     dep_b_on_a_2 = make_dep_map(
         "[pi] -> {{ [i'] -> [i] : i = i' "
         "and {0}}}".format(assumptions_str),
-        self_dep=False, knl_with_domains=knl)
+        self_dep=False, knl_with_domains=knl["loopy_kernel"])
 
     # test make_dep_map while we're here:
     dep_b_on_a_2_test = _isl_map_with_marked_dims(
@@ -1401,7 +1399,7 @@ def test_add_dependency_v2():
 
     knl = lp.add_dependency_v2(knl, "stmt_b", "stmt_a", dep_b_on_a_2)
 
-    for stmt in knl.instructions:
+    for stmt in knl["loopy_kernel"].instructions:
         if stmt.id == "stmt_b":
             assert stmt.dependencies == {
                 "stmt_a": [dep_b_on_a, dep_b_on_a_2],
@@ -1432,7 +1430,7 @@ def test_add_dependency_v2():
     knl = lp.add_dependency_v2(knl, "stmt_c", "stmt_a", dep_c_on_a)
     knl = lp.add_dependency_v2(knl, "stmt_c", "stmt_b", dep_c_on_b)
 
-    for stmt in knl.instructions:
+    for stmt in knl["loopy_kernel"].instructions:
         if stmt.id == "stmt_b":
             assert stmt.dependencies == {
                 "stmt_a": [dep_b_on_a, dep_b_on_a_2],
@@ -1481,18 +1479,17 @@ def test_make_dep_map():
         a[i,j] = 3.14  {id=stmt_a}
         b[k] = a[i,k]  {id=stmt_b, dep=stmt_a}
         """,
-        name="example",
         lang_version=(2018, 2)
         )
     knl = lp.add_and_infer_dtypes(knl, {"a,b": np.float32})
 
-    for stmt in knl.instructions:
+    for stmt in knl["loopy_kernel"].instructions:
         assert not stmt.dependencies
 
     # Add a dependency to stmt_b
     dep_b_on_a = make_dep_map(
         "[n] -> { [i',j'] -> [i,k] : i > i' and j' < k}",
-        self_dep=False, knl_with_domains=knl)
+        self_dep=False, knl_with_domains=knl["loopy_kernel"])
 
     # Create expected dep
     dep_b_on_a_test = _isl_map_with_marked_dims(
@@ -1551,7 +1548,6 @@ def test_new_dependencies_finite_diff():
     unsatisfied_deps = lp.find_unsatisfied_dependencies(
         proc_knl, lin_items)
 
-    print(lp.generate_code_v2(lin_knl).device_code())
     assert not unsatisfied_deps
 
     # Make sure dep checking also works with just linearized kernel
@@ -1572,7 +1568,6 @@ def test_new_dependencies_finite_diff():
     unsatisfied_deps = lp.find_unsatisfied_dependencies(
         proc_knl, lin_items)
 
-    print(lp.generate_code_v2(lin_knl).device_code())
     assert len(unsatisfied_deps) == 1
 
     # }}}
@@ -1589,7 +1584,6 @@ def test_new_dependencies_finite_diff():
     # Without a barrier, deps not satisfied
     # Make sure there is no barrier, and that unsatisfied deps are caught
     from loopy.schedule import Barrier
-    print(lp.generate_code_v2(lin_knl).device_code())
     for lin_item in lin_items:
         assert not isinstance(lin_item, Barrier)
 
@@ -1616,7 +1610,6 @@ def test_new_dependencies_finite_diff():
 
     # Make sure deps are satisfied
     lin_items, proc_knl, lin_knl = _process_and_linearize(knl)
-    print(lp.generate_code_v2(lin_knl).device_code())
 
     unsatisfied_deps = lp.find_unsatisfied_dependencies(
         proc_knl, lin_items)

From 150576a392da79fd0c4f1ae2227e92c3b6647243 Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Thu, 15 Jul 2021 16:51:08 -0500
Subject: [PATCH 427/460] use @for_each_kernel with add_dependency_v2

---
 loopy/transform/instruction.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/loopy/transform/instruction.py b/loopy/transform/instruction.py
index 11b93c386..5b5934da4 100644
--- a/loopy/transform/instruction.py
+++ b/loopy/transform/instruction.py
@@ -146,6 +146,7 @@ def add_dep(insn):
 
 # {{{ add_dependency_v2
 
+@for_each_kernel
 def add_dependency_v2(
         kernel, stmt_id, depends_on_id, new_dependency):
     """Add the statement instance dependency `new_dependency` to the statement with

From e55048c8c2317b64955f1acc7aa342f4a9af158c Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Thu, 15 Jul 2021 18:57:45 -0500
Subject: [PATCH 428/460] handle dependencies on barrier stmts by assigning
 them a lex point

---
 loopy/schedule/checker/schedule.py | 25 +++++++++++++++++++++++++
 1 file changed, 25 insertions(+)

diff --git a/loopy/schedule/checker/schedule.py b/loopy/schedule/checker/schedule.py
index 0d7d9aec6..18f391c2b 100644
--- a/loopy/schedule/checker/schedule.py
+++ b/loopy/schedule/checker/schedule.py
@@ -532,6 +532,31 @@ def _gather_blex_ordering_info(sync_kind):
                 if lin_item.synchronization_kind == sync_kind:
                     next_blex_tuple[-1] += 1
 
+                lp_stmt_id = lin_item.originating_insn_id
+
+                if lp_stmt_id is None:
+                    # Barriers without stmt ids were inserted as a result of a
+                    # dependency. They don't themselves have dependencies.
+                    # Don't map this barrier to a blex tuple.
+                    continue
+
+                # This barrier has a stmt id.
+                # If it was included in listed stmts, process it.
+                # Otherwise, there's nothing left to do (we've already
+                # incremented next_blex_tuple if necessary, and this barrier
+                # does not need to be assigned to a designated point in blex
+                # time)
+                if lp_stmt_id in all_stmt_ids:
+                    # If sync scope matches, give this barrier its own point in
+                    # lex time and update blex tuple after barrier.
+                    # Otherwise, add stmt->blex pair to stmt_inst_to_blex, but
+                    # don't update the blex tuple (just like with any other
+                    # stmt)
+                    if lin_item.synchronization_kind == sync_kind:
+                        stmt_inst_to_blex[lp_stmt_id] = tuple(next_blex_tuple)
+                        next_blex_tuple[-1] += 1
+                    else:
+                        stmt_inst_to_blex[lp_stmt_id] = tuple(next_blex_tuple)
             else:
                 from loopy.schedule import (CallKernel, ReturnFromKernel)
                 # No action needed for these types of linearization item

From f91338d81fb764de5061ad64915d21fc7a81663f Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Thu, 15 Jul 2021 18:58:29 -0500
Subject: [PATCH 429/460] test pairwise SIOs where one of the statements is a
 barrier

---
 test/test_linearization_checker.py | 264 +++++++++++++++++++++++++++++
 1 file changed, 264 insertions(+)

diff --git a/test/test_linearization_checker.py b/test/test_linearization_checker.py
index daa43cd95..de88d98fc 100644
--- a/test/test_linearization_checker.py
+++ b/test/test_linearization_checker.py
@@ -725,6 +725,270 @@ def test_statement_instance_ordering_with_hw_par_tags():
 # }}}
 
 
+# {{{ test_statement_instance_ordering_of_barriers()
+
+def test_statement_instance_ordering_of_barriers():
+    from loopy.schedule.checker import (
+        get_pairwise_statement_orderings,
+    )
+    from loopy.schedule.checker.utils import (
+        partition_inames_by_concurrency,
+    )
+
+    # Example kernel
+    knl = lp.make_kernel(
+        [
+            "{[i,ii]: 0<=i,ii<pi}",
+            "{[j,jj]: 0<=j,jj<pj}",
+        ],
+        """
+        for i
+            for ii
+                ... gbarrier {id=gbar}
+                for j
+                    for jj
+                        <>temp = b[i,ii,j,jj]  {id=stmt_a,dep=gbar}
+                        ... lbarrier {id=lbar0,dep=stmt_a}
+                        a[i,ii,j,jj] = temp + 1  {id=stmt_b,dep=lbar0}
+                        ... lbarrier {id=lbar1,dep=stmt_b}
+                    end
+                end
+            end
+        end
+        <>temp2 = 0.5  {id=stmt_c,dep=lbar1}
+        """,
+        assumptions="pi,pj >= 1",
+        lang_version=(2018, 2)
+        )
+    knl = lp.add_and_infer_dtypes(knl, {"a,b": np.float32})
+    knl = lp.tag_inames(knl, {"j": "l.0", "i": "g.0"})
+    knl = lp.prioritize_loops(knl, "ii,jj")
+
+    # Get a linearization
+    lin_items, proc_knl, lin_knl = _process_and_linearize(knl)
+
+    # Get pairwise schedules
+    stmt_id_pairs = [
+        ("stmt_a", "stmt_b"),
+        ("gbar", "stmt_a"),
+        ("stmt_b", "lbar1"),
+        ("lbar1", "stmt_c"),
+        ]
+    pworders = get_pairwise_statement_orderings(
+        lin_knl,
+        lin_items,
+        stmt_id_pairs,
+        )
+
+    # Create string for representing parallel iname SAME condition in sio
+    conc_inames, _ = partition_inames_by_concurrency(knl["loopy_kernel"])
+    par_iname_condition = " and ".join(
+        "{0} = {0}'".format(iname) for iname in conc_inames)
+
+    # {{{ Intra-thread relationship between stmt_a and stmt_b
+
+    sio_intra_thread_exp = _isl_map_with_marked_dims(
+        "[pi, pj] -> {{ "
+        "[{0}'=0, i', ii', j', jj'] -> [{0}=1, i, ii, j, jj] : "
+        "0 <= i,ii,i',ii' < pi and 0 <= j,jj,j',jj' < pj "
+        "and (ii > ii' or (ii = ii' and jj >= jj')) "
+        "and {1} "
+        "}}".format(
+            STATEMENT_VAR_NAME,
+            par_iname_condition,
+            )
+        )
+
+    _check_orderings_for_stmt_pair(
+        "stmt_a", "stmt_b", pworders,
+        sio_intra_thread_exp=sio_intra_thread_exp)
+
+    # }}}
+
+    # {{{ Relationship between gbar and stmt_a
+
+    # intra-thread case
+
+    sio_intra_thread_exp = _isl_map_with_marked_dims(
+        "[pi, pj] -> {{ "
+        "[{0}'=0, i', ii'] -> [{0}=1, i, ii, j, jj] : "
+        "0 <= i,ii,i',ii' < pi and 0 <= j,jj < pj "  # domains
+        "and i = i' "  # parallel inames must be same
+        "and ii >= ii' "  # before->after condtion
+        "}}".format(
+            STATEMENT_VAR_NAME,
+            )
+        )
+
+    # intra-group case
+    # TODO figure out what this should be
+    """
+    sio_intra_group_exp = _isl_map_with_marked_dims(
+        "[pi, pj] -> {{ "
+        "[{0}'=0, i', ii'] -> [{0}=1, i, ii, j, jj] : "
+        "0 <= i,ii,i',ii' < pi and 0 <= j,jj < pj "  # domains
+        "and i = i' "  # GID inames must be same
+        "and (ii > ii' or (ii = ii' and jj = 0))"  # before->after condtion
+        "}}".format(
+            STATEMENT_VAR_NAME,
+            )
+        )
+    """
+
+    # global case
+
+    sio_global_exp = _isl_map_with_marked_dims(
+        "[pi, pj] -> {{ "
+        "[{0}'=0, i', ii'] -> [{0}=1, i, ii, j, jj] : "
+        "0 <= i,ii,i',ii' < pi and 0 <= j,jj < pj "  # domains
+        "and ii >= ii' "  # before->after condtion
+        "}}".format(
+            STATEMENT_VAR_NAME,
+            )
+        )
+
+    _check_orderings_for_stmt_pair(
+        "gbar", "stmt_a", pworders,
+        sio_intra_thread_exp=sio_intra_thread_exp,
+        # sio_intra_group_exp=sio_intra_group_exp,
+        sio_global_exp=sio_global_exp)
+
+    # }}}
+
+    # {{{ Relationship between stmt_b and lbar1
+
+    # intra thread case
+
+    sio_intra_thread_exp = _isl_map_with_marked_dims(
+        "[pi, pj] -> {{ "
+        "[{0}'=0, i', ii', j', jj'] -> [{0}=1, i, ii, j, jj] : "
+        "0 <= i,ii,i',ii' < pi and 0 <= j,jj,j',jj' < pj "  # domains
+        "and i = i' and j = j'"  # parallel inames must be same
+        "and (ii > ii' or (ii = ii' and jj >= jj'))"  # before->after condtion
+        "}}".format(
+            STATEMENT_VAR_NAME,
+            )
+        )
+
+    # intra-group case
+
+    sio_intra_group_exp = _isl_map_with_marked_dims(
+        "[pi, pj] -> {{ "
+        "[{0}'=0, i', ii', j', jj'] -> [{0}=1, i, ii, j, jj] : "
+        "0 <= i,ii,i',ii' < pi and 0 <= j,jj,j',jj' < pj "  # domains
+        "and i = i' "  # GID parallel inames must be same
+        "and (ii > ii' or (ii = ii' and jj >= jj'))"  # before->after condtion
+        "}}".format(
+            STATEMENT_VAR_NAME,
+            )
+        )
+
+    # global case
+
+    sio_global_exp = _isl_map_with_marked_dims(
+        "[pi, pj] -> {{ "
+        "[{0}'=0, i', ii', j', jj'] -> [{0}=1, i, ii, j, jj] : "
+        "0 <= i,ii,i',ii' < pi and 0 <= j,jj,j',jj' < pj "  # domains
+        "and ii > ii'"  # before->after condtion
+        "}}".format(
+            STATEMENT_VAR_NAME,
+            )
+        )
+
+    _check_orderings_for_stmt_pair(
+        "stmt_b", "lbar1", pworders,
+        sio_intra_thread_exp=sio_intra_thread_exp,
+        sio_intra_group_exp=sio_intra_group_exp,
+        sio_global_exp=sio_global_exp,
+        )
+
+    # }}}
+
+    # {{{ Relationship between stmt_a and stmt_b
+
+    # intra thread case
+
+    sio_intra_thread_exp = _isl_map_with_marked_dims(
+        "[pi, pj] -> {{ "
+        "[{0}'=0, i', ii', j', jj'] -> [{0}=1, i, ii, j, jj] : "
+        "0 <= i,ii,i',ii' < pi and 0 <= j,jj,j',jj' < pj "  # domains
+        "and i = i' and j = j'"  # parallel inames must be same
+        "and (ii > ii' or (ii = ii' and jj >= jj'))"  # before->after condtion
+        "}}".format(
+            STATEMENT_VAR_NAME,
+            )
+        )
+
+    # intra-group case
+
+    sio_intra_group_exp = _isl_map_with_marked_dims(
+        "[pi, pj] -> {{ "
+        "[{0}'=0, i', ii', j', jj'] -> [{0}=1, i, ii, j, jj] : "
+        "0 <= i,ii,i',ii' < pi and 0 <= j,jj,j',jj' < pj "  # domains
+        "and i = i' "  # GID parallel inames must be same
+        "and (ii > ii' or (ii = ii' and jj >= jj'))"  # before->after condtion
+        "}}".format(
+            STATEMENT_VAR_NAME,
+            )
+        )
+
+    _check_orderings_for_stmt_pair(
+        "stmt_a", "stmt_b", pworders,
+        sio_intra_thread_exp=sio_intra_thread_exp,
+        sio_intra_group_exp=sio_intra_group_exp,
+        )
+
+    # }}}
+
+    # {{{ Relationship between lbar1 and stmt_c
+
+    # intra thread case
+
+    sio_intra_thread_exp = _isl_map_with_marked_dims(
+        "[pi, pj] -> {{ "
+        "[{0}'=0, i', ii', j', jj'] -> [{0}=1] : "
+        "0 <= i',ii' < pi and 0 <= j',jj' < pj "  # domains
+        "}}".format(
+            STATEMENT_VAR_NAME,
+            )
+        )
+
+    # intra-group case
+
+    sio_intra_group_exp = _isl_map_with_marked_dims(
+        "[pi, pj] -> {{ "
+        "[{0}'=0, i', ii', j', jj'] -> [{0}=1] : "
+        "0 <= i',ii' < pi and 0 <= j',jj' < pj "  # domains
+        "}}".format(
+            STATEMENT_VAR_NAME,
+            )
+        )
+
+    # global case
+
+    # (only happens before if not last iteration of ii
+    sio_global_exp = _isl_map_with_marked_dims(
+        "[pi, pj] -> {{ "
+        "[{0}'=0, i', ii', j', jj'] -> [{0}=1] : "
+        "0 <= i',ii' < pi and 0 <= j',jj' < pj "  # domains
+        "and ii' < pi-1"
+        "}}".format(
+            STATEMENT_VAR_NAME,
+            )
+        )
+
+    _check_orderings_for_stmt_pair(
+        "lbar1", "stmt_c", pworders,
+        sio_intra_thread_exp=sio_intra_thread_exp,
+        sio_intra_group_exp=sio_intra_group_exp,
+        sio_global_exp=sio_global_exp,
+        )
+
+    # }}}
+
+# }}}
+
+
 # {{{ test_sios_and_schedules_with_barriers()
 
 def test_sios_and_schedules_with_barriers():

From 814607ae29a66cdd3080e7e25ea9f7e8d766aeb7 Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Sat, 17 Jul 2021 17:31:46 -0500
Subject: [PATCH 430/460] fix doc indentation

---
 loopy/schedule/checker/utils.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/loopy/schedule/checker/utils.py b/loopy/schedule/checker/utils.py
index 401fd477a..0698faf21 100644
--- a/loopy/schedule/checker/utils.py
+++ b/loopy/schedule/checker/utils.py
@@ -265,7 +265,7 @@ def get_EnterLoop_inames(linearization_items):
 def create_elementwise_comparison_conjunction_set(
         names0, names1, islvars, op="eq"):
     """Create a set constrained by the conjunction of conditions comparing
-       `names0` to `names1`.
+    `names0` to `names1`.
 
     :arg names0: A list of :class:`str` representing variable names.
 

From f099b8f3f8006ecd25b371f6c57e0e3201cb45e6 Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Sat, 17 Jul 2021 17:31:57 -0500
Subject: [PATCH 431/460] Update loopy/schedule/checker/schedule.py
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

fix doc indentation

Co-authored-by: Andreas Klöckner <inform@tiker.net>
---
 loopy/schedule/checker/schedule.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/loopy/schedule/checker/schedule.py b/loopy/schedule/checker/schedule.py
index 18f391c2b..3529dd48d 100644
--- a/loopy/schedule/checker/schedule.py
+++ b/loopy/schedule/checker/schedule.py
@@ -151,7 +151,7 @@ def _simplify_lex_dims(tup0, tup1):
 
 class SpecialLexPointWRTLoop:
     """Strings identifying a particular point or set of points in a
-        lexicographic ordering of statements, specified relative to a loop.
+    lexicographic ordering of statements, specified relative to a loop.
 
     .. attribute:: PRE
        A :class:`str` indicating the last lexicographic point that

From 7d34906dcc70f0bda26c754dd9e70af989f8cabc Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Sat, 17 Jul 2021 17:34:34 -0500
Subject: [PATCH 432/460] Update loopy/schedule/checker/__init__.py
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

more generic terminology in doc string

Co-authored-by: Andreas Klöckner <inform@tiker.net>
---
 loopy/schedule/checker/__init__.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/loopy/schedule/checker/__init__.py b/loopy/schedule/checker/__init__.py
index 4ca15fe7f..529573860 100644
--- a/loopy/schedule/checker/__init__.py
+++ b/loopy/schedule/checker/__init__.py
@@ -44,7 +44,7 @@ def get_pairwise_statement_orderings(
         this routine during linearization, a truncated (i.e. partial)
         linearization may be passed through this argument.
 
-    :arg stmt_id_pairs: A list containing pairs of statement identifiers.
+    :arg stmt_id_pairs: A sequence containing pairs of statement identifiers.
 
     :returns: A dictionary mapping each two-tuple of statement identifiers
         provided in `stmt_id_pairs` to a :class:`collections.namedtuple`

From 4092ae66a8ad539896f5f3b010da0d8ad3b00409 Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Sat, 17 Jul 2021 17:53:57 -0500
Subject: [PATCH 433/460] revise docstring for StatementOrdering

---
 loopy/schedule/checker/schedule.py | 31 +++++++++++++++++++-----------
 1 file changed, 20 insertions(+), 11 deletions(-)

diff --git a/loopy/schedule/checker/schedule.py b/loopy/schedule/checker/schedule.py
index 3529dd48d..e91cd6ac8 100644
--- a/loopy/schedule/checker/schedule.py
+++ b/loopy/schedule/checker/schedule.py
@@ -224,18 +224,14 @@ def get_pairwise_statement_orderings_inner(
         access tags.
 
     :returns: A dictionary mapping each two-tuple of statement identifiers
-        provided in `stmt_id_pairs` to a :class:`collections.namedtuple`
+        provided in `stmt_id_pairs` to a :class:`StatementOrdering`
         containing the intra-thread SIO (`sio_intra_thread`), intra-group SIO
-        (`sio_intra_group`), and global SIO (`sio_global`), each realized
-        as an :class:`islpy.Map` from each instance of the first
-        statement to all instances of the second statement that occur later,
-        as well as the intra-thread pairwise schedule (`pwsched_intra_thread`),
-        intra-group pairwise schedule (`pwsched_intra_group`), and the global
-        pairwise schedule (`pwsched_global`), each containing a pair of
-        mappings from statement instances to points in a lexicographic
-        ordering, one for each statement. Note that a pairwise schedule
-        alone cannot be used to reproduce the corresponding SIO without the
-        corresponding (unique) lexicographic order map, which is not returned.
+        (`sio_intra_group`), global SIO (`sio_global`), intra-thread pairwise
+        schedule (`pwsched_intra_thread`), intra-group pairwise schedule
+        (`pwsched_intra_group`), and the global pairwise schedule
+        (`pwsched_global`). Note that a pairwise schedule alone cannot be used
+        to reproduce the corresponding SIO without the corresponding
+        lexicographic order map, which is not returned.
     """
 
     from loopy.schedule import (EnterLoop, LeaveLoop, Barrier, RunInstruction)
@@ -827,6 +823,19 @@ def _get_map_for_stmt(
     # }}}
 
     pairwise_sios = {}
+
+    """Create :class:`StatementOrdering` containing the
+    intra-thread SIO (`sio_intra_thread`),
+    intra-group SIO (`sio_intra_group`),
+    global SIO (`sio_global`),
+    intra-thread pairwise schedule (`pwsched_intra_thread`),
+    intra-group pairwise schedule (`pwsched_intra_group`),
+    and the global pairwise schedule (`pwsched_global`),
+    Each SIO is realized as an :class:`islpy.Map` from each instance of the
+    first statement to all instances of the second statement that occur later.
+    Each pairwise schedule contains a pair of mappings from statement
+    instances to points in a lexicographic ordering, one for each statement.
+    """
     from collections import namedtuple
     StatementOrdering = namedtuple(
         "StatementOrdering",

From 4196adbf24957415f44922edd1585a9f83087a37 Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Sat, 17 Jul 2021 19:08:14 -0500
Subject: [PATCH 434/460] promote StatementOrdering to top-level dataclass

---
 loopy/schedule/checker/schedule.py | 85 +++++++++++++++++-------------
 1 file changed, 49 insertions(+), 36 deletions(-)

diff --git a/loopy/schedule/checker/schedule.py b/loopy/schedule/checker/schedule.py
index e91cd6ac8..02ff14ae5 100644
--- a/loopy/schedule/checker/schedule.py
+++ b/loopy/schedule/checker/schedule.py
@@ -21,6 +21,7 @@
 """
 
 import islpy as isl
+from dataclasses import dataclass
 dt = isl.dim_type.set
 
 
@@ -154,28 +155,28 @@ class SpecialLexPointWRTLoop:
     lexicographic ordering of statements, specified relative to a loop.
 
     .. attribute:: PRE
-       A :class:`str` indicating the last lexicographic point that
-       precedes the loop.
+        A :class:`str` indicating the last lexicographic point that
+        precedes the loop.
 
     .. attribute:: FIRST
-       A :class:`str` indicating the first lexicographic point in the
-       first loop iteration (i.e., with the iname set to its min. val).
+        A :class:`str` indicating the first lexicographic point in the
+        first loop iteration (i.e., with the iname set to its min. val).
 
     .. attribute:: TOP
-       A :class:`str` indicating the first lexicographic point in
-       an arbitrary loop iteration.
+        A :class:`str` indicating the first lexicographic point in
+        an arbitrary loop iteration.
 
     .. attribute:: BOTTOM
-       A :class:`str` indicating the last lexicographic point in
-       an arbitrary loop iteration.
+        A :class:`str` indicating the last lexicographic point in
+        an arbitrary loop iteration.
 
     .. attribute:: LAST
-       A :class:`str` indicating the last lexicographic point in the
-       last loop iteration (i.e., with the iname set to its max val).
+        A :class:`str` indicating the last lexicographic point in the
+        last loop iteration (i.e., with the iname set to its max val).
 
     .. attribute:: POST
-       A :class:`str` indicating the first lexicographic point that
-       follows the loop.
+        A :class:`str` indicating the first lexicographic point that
+        follows the loop.
     """
 
     PRE = "pre"
@@ -188,6 +189,37 @@ class SpecialLexPointWRTLoop:
 # }}}
 
 
+# {{{ class StatementOrdering
+
+@dataclass
+class StatementOrdering:
+    r"""A container for mappings used to describe the ordering of statement
+    instances for a pair of statements. These include the intra-thread SIO
+    (`sio_intra_thread`), intra-group SIO (`sio_intra_group`), and global SIO
+    (`sio_global`), each realized as an :class:`islpy.Map` from each instance
+    of the first statement to all instances of the second statement that occur
+    later.
+
+    Also included (mostly for testing and debugging) are the
+    intra-thread pairwise schedule (`pwsched_intra_thread`), intra-group
+    pairwise schedule (`pwsched_intra_group`), and global pairwise schedule
+    (`pwsched_global`), each containing a pair of mappings from statement
+    instances to points in a lexicographic ordering, one for each statement.
+    Each SIO is created by composing the two mappings in the corresponding
+    pairwise schedule with an associated mapping defining the ordering of
+    points in the lexicographical space (not included).
+    """
+
+    sio_intra_thread: isl.Map
+    sio_intra_group: isl.Map
+    sio_global: isl.Map
+    pwsched_intra_thread: tuple
+    pwsched_intra_group: tuple
+    pwsched_global: tuple
+
+# }}}
+
+
 # {{{ get_pairwise_statement_orderings_inner
 
 def get_pairwise_statement_orderings_inner(
@@ -824,28 +856,6 @@ def _get_map_for_stmt(
 
     pairwise_sios = {}
 
-    """Create :class:`StatementOrdering` containing the
-    intra-thread SIO (`sio_intra_thread`),
-    intra-group SIO (`sio_intra_group`),
-    global SIO (`sio_global`),
-    intra-thread pairwise schedule (`pwsched_intra_thread`),
-    intra-group pairwise schedule (`pwsched_intra_group`),
-    and the global pairwise schedule (`pwsched_global`),
-    Each SIO is realized as an :class:`islpy.Map` from each instance of the
-    first statement to all instances of the second statement that occur later.
-    Each pairwise schedule contains a pair of mappings from statement
-    instances to points in a lexicographic ordering, one for each statement.
-    """
-    from collections import namedtuple
-    StatementOrdering = namedtuple(
-        "StatementOrdering",
-        [
-            "sio_intra_thread", "pwsched_intra_thread",
-            "sio_intra_group", "pwsched_intra_group",
-            "sio_global", "pwsched_global",
-        ])
-    # ("sio" = statement instance ordering; "pwsched" = pairwise schedule)
-
     for stmt_ids in stmt_id_pairs:
         # Determine integer IDs that will represent each statement in mapping
         # (dependency map creation assumes sid_before=0 and sid_after=1, unless
@@ -928,6 +938,9 @@ def _get_sched_maps_and_sio(
                 in zip(stmt_ids, blex_tuples_padded, int_sids)
                 ]
 
+            # Note that for the intra-group case, we already constrained GID
+            # 'before' to equal GID 'after' earlier in _gather_blex_ordering_info()
+
             # Create statement instance ordering
             sio_par = get_statement_ordering_map(
                 *par_sched_maps,  # note, func accepts exactly two maps
@@ -947,10 +960,10 @@ def _get_sched_maps_and_sio(
         # Store sched maps along with SIOs
         pairwise_sios[tuple(stmt_ids)] = StatementOrdering(
             sio_intra_thread=sio_intra_thread,
-            pwsched_intra_thread=tuple(intra_thread_sched_maps),
             sio_intra_group=sio_intra_group,
-            pwsched_intra_group=tuple(pwsched_intra_group),
             sio_global=sio_global,
+            pwsched_intra_thread=tuple(intra_thread_sched_maps),
+            pwsched_intra_group=tuple(pwsched_intra_group),
             pwsched_global=tuple(pwsched_global),
             )
 

From 4e847a5c7581aafae126d74ca80328f2d02d1383 Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Sat, 17 Jul 2021 19:10:37 -0500
Subject: [PATCH 435/460] add dataclass to install_requires in setup.py

---
 setup.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/setup.py b/setup.py
index 2e907c1b9..2f04d2203 100644
--- a/setup.py
+++ b/setup.py
@@ -90,6 +90,7 @@ def write_git_revision(package_name):
 
           # https://github.com/inducer/loopy/pull/419
           "numpy>=1.19",
+          "dataclasses>=0.7;python_version<='3.6'"
 
           "cgen>=2016.1",
           "islpy>=2019.1",

From 9ecd66d4e5e4129dd6e8b2c8f713eb2221beae1b Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Sat, 17 Jul 2021 19:41:37 -0500
Subject: [PATCH 436/460] fix docstring indentation

---
 loopy/schedule/checker/lexicographic_order_map.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/loopy/schedule/checker/lexicographic_order_map.py b/loopy/schedule/checker/lexicographic_order_map.py
index 0a01f888c..896235b82 100644
--- a/loopy/schedule/checker/lexicographic_order_map.py
+++ b/loopy/schedule/checker/lexicographic_order_map.py
@@ -27,7 +27,7 @@
 def get_statement_ordering_map(
         sched_before, sched_after, lex_map, before_mark):
     """Return a statement ordering represented as a map from each statement
-        instance to all statement instances occurring later.
+    instance to all statement instances occurring later.
 
     :arg sched_before: An :class:`islpy.Map` representing a schedule
         as a mapping from statement instances (for one particular statement)
@@ -77,9 +77,9 @@ def get_lex_order_set(
         islvars=None,
         ):
     """Return an :class:`islpy.Set` representing a lexicographic ordering
-        over a space with the number of dimensions provided in `dim_names`
-        (the set itself will have twice this many dimensions in order to
-        represent the ordering as before-after pairs of points).
+    over a space with the number of dimensions provided in `dim_names`
+    (the set itself will have twice this many dimensions in order to
+    represent the ordering as before-after pairs of points).
 
     :arg dim_names: A list of :class:`str` variable names to be used
         to describe lexicographic space dimensions for a point in a lexicographic
@@ -160,7 +160,7 @@ def create_lex_order_map(
         in_dim_mark,
         ):
     """Return a map from each point in a lexicographic ordering to every
-        point that occurs later in the lexicographic ordering.
+    point that occurs later in the lexicographic ordering.
 
     :arg dim_names: A list of :class:`str` variable names for the
         lexicographic space dimensions.

From 7c4785a36352d3e1abaffc80a7cfe94a22de0bbb Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Sat, 17 Jul 2021 19:42:47 -0500
Subject: [PATCH 437/460] reorg and improve docstrings about SIOs a bit

---
 loopy/schedule/checker/__init__.py | 37 ++++++++++++++-------------
 loopy/schedule/checker/schedule.py | 40 ++++++++++++++++--------------
 2 files changed, 42 insertions(+), 35 deletions(-)

diff --git a/loopy/schedule/checker/__init__.py b/loopy/schedule/checker/__init__.py
index 529573860..d24b7a2ea 100644
--- a/loopy/schedule/checker/__init__.py
+++ b/loopy/schedule/checker/__init__.py
@@ -30,10 +30,23 @@ def get_pairwise_statement_orderings(
         ):
     r"""For each statement pair in a subset of all statement pairs found in a
     linearized kernel, determine the (relative) order in which the statement
-    instances are executed. For each pair, represent this relative ordering as
-    a ``statement instance ordering`` (SIO): a map from each instance of the
-    first statement to all instances of the second statement that occur
-    later.
+    instances are executed. For each pair, represent this relative ordering
+    using three ``statement instance orderings`` (SIOs):
+
+    - The intra-thread SIO: A :class:`islpy.Map` from each instance of the
+      first statement to all instances of the second statement that occur
+      later, such that both statement instances in each before-after pair are
+      executed within the same work-item (thread).
+
+    - The intra-group SIO: A :class:`islpy.Map` from each instance of the first
+      statement to all instances of the second statement that occur later, such
+      that both statement instances in each before-after pair are executed
+      within the same work-group (though potentially by different work-items).
+
+    - The global SIO: A :class:`islpy.Map` from each instance of the first
+      statement to all instances of the second statement that occur later, even
+      if the two statement instances in a given before-after pair are executed
+      within different work-groups.
 
     :arg knl: A preprocessed :class:`loopy.kernel.LoopKernel` containing the
         linearization items that will be used to create the SIOs.
@@ -47,18 +60,8 @@ def get_pairwise_statement_orderings(
     :arg stmt_id_pairs: A sequence containing pairs of statement identifiers.
 
     :returns: A dictionary mapping each two-tuple of statement identifiers
-        provided in `stmt_id_pairs` to a :class:`collections.namedtuple`
-        containing the intra-thread SIO (`sio_intra_thread`), intra-group SIO
-        (`sio_intra_group`), and global SIO (`sio_global`), each realized
-        as an :class:`islpy.Map` from each instance of the first
-        statement to all instances of the second statement that occur later,
-        as well as the intra-thread pairwise schedule (`pwsched_intra_thread`),
-        intra-group pairwise schedule (`pwsched_intra_group`), and the global
-        pairwise schedule (`pwsched_global`), each containing a pair of
-        mappings from statement instances to points in a lexicographic
-        ordering, one for each statement. Note that a pairwise schedule
-        alone cannot be used to reproduce the corresponding SIO without the
-        corresponding (unique) lexicographic order map, which is not returned.
+        provided in `stmt_id_pairs` to a :class:`StatementOrdering`, which
+        contains the three SIOs described above.
 
     .. doctest:
 
@@ -124,7 +127,7 @@ def get_pairwise_statement_orderings(
 
     # }}}
 
-    # {{{ Create two mappings from {statement instance: lex point}
+    # {{{ Create the SIOs
 
     from loopy.schedule.checker.schedule import (
         get_pairwise_statement_orderings_inner
diff --git a/loopy/schedule/checker/schedule.py b/loopy/schedule/checker/schedule.py
index 02ff14ae5..85e1c8429 100644
--- a/loopy/schedule/checker/schedule.py
+++ b/loopy/schedule/checker/schedule.py
@@ -193,12 +193,9 @@ class SpecialLexPointWRTLoop:
 
 @dataclass
 class StatementOrdering:
-    r"""A container for mappings used to describe the ordering of statement
-    instances for a pair of statements. These include the intra-thread SIO
-    (`sio_intra_thread`), intra-group SIO (`sio_intra_group`), and global SIO
-    (`sio_global`), each realized as an :class:`islpy.Map` from each instance
-    of the first statement to all instances of the second statement that occur
-    later.
+    r"""A container for the three statement instance orderings (described
+    below) used to formalize the ordering of statement instances for a pair of
+    statements.
 
     Also included (mostly for testing and debugging) are the
     intra-thread pairwise schedule (`pwsched_intra_thread`), intra-group
@@ -230,10 +227,23 @@ def get_pairwise_statement_orderings_inner(
         ):
     r"""For each statement pair in a subset of all statement pairs found in a
     linearized kernel, determine the (relative) order in which the statement
-    instances are executed. For each pair, represent this relative ordering as
-    a ``statement instance ordering`` (SIO): a map from each instance of the
-    first statement to all instances of the second statement that occur
-    later.
+    instances are executed. For each pair, represent this relative ordering
+    using three ``statement instance orderings`` (SIOs):
+
+    - The intra-thread SIO: A :class:`islpy.Map` from each instance of the
+      first statement to all instances of the second statement that occur
+      later, such that both statement instances in each before-after pair are
+      executed within the same work-item (thread).
+
+    - The intra-group SIO: A :class:`islpy.Map` from each instance of the first
+      statement to all instances of the second statement that occur later, such
+      that both statement instances in each before-after pair are executed
+      within the same work-group (though potentially by different work-items).
+
+    - The global SIO: A :class:`islpy.Map` from each instance of the first
+      statement to all instances of the second statement that occur later, even
+      if the two statement instances in a given before-after pair are executed
+      within different work-groups.
 
     :arg knl: A preprocessed :class:`loopy.kernel.LoopKernel` containing the
         linearization items that will be used to create the SIOs. This
@@ -256,14 +266,8 @@ def get_pairwise_statement_orderings_inner(
         access tags.
 
     :returns: A dictionary mapping each two-tuple of statement identifiers
-        provided in `stmt_id_pairs` to a :class:`StatementOrdering`
-        containing the intra-thread SIO (`sio_intra_thread`), intra-group SIO
-        (`sio_intra_group`), global SIO (`sio_global`), intra-thread pairwise
-        schedule (`pwsched_intra_thread`), intra-group pairwise schedule
-        (`pwsched_intra_group`), and the global pairwise schedule
-        (`pwsched_global`). Note that a pairwise schedule alone cannot be used
-        to reproduce the corresponding SIO without the corresponding
-        lexicographic order map, which is not returned.
+        provided in `stmt_id_pairs` to a :class:`StatementOrdering`, which
+        contains the three SIOs described above.
     """
 
     from loopy.schedule import (EnterLoop, LeaveLoop, Barrier, RunInstruction)

From 7614ab88b9d1b0a81ee07a8168f589f3a3d98f67 Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Sat, 17 Jul 2021 20:05:01 -0500
Subject: [PATCH 438/460] consistent naming of funcs

---
 loopy/schedule/checker/lexicographic_order_map.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/loopy/schedule/checker/lexicographic_order_map.py b/loopy/schedule/checker/lexicographic_order_map.py
index 896235b82..f0ae65f98 100644
--- a/loopy/schedule/checker/lexicographic_order_map.py
+++ b/loopy/schedule/checker/lexicographic_order_map.py
@@ -71,7 +71,7 @@ def get_statement_ordering_map(
         sio, isl.dim_type.in_, before_mark)
 
 
-def get_lex_order_set(
+def _create_lex_order_set(
         dim_names,
         in_dim_mark,
         islvars=None,
@@ -186,7 +186,7 @@ def create_lex_order_map(
     dim_type = isl.dim_type
 
     # First, get a set representing the lexicographic ordering.
-    lex_order_set = get_lex_order_set(
+    lex_order_set = _create_lex_order_set(
         dim_names,
         in_dim_mark=in_dim_mark,
         )

From 9963c9b73944fee1c278a85bd79af7ad604e184c Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Sat, 17 Jul 2021 20:10:14 -0500
Subject: [PATCH 439/460] docstring grammar typo

---
 loopy/schedule/checker/lexicographic_order_map.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/loopy/schedule/checker/lexicographic_order_map.py b/loopy/schedule/checker/lexicographic_order_map.py
index f0ae65f98..aac4ac8c7 100644
--- a/loopy/schedule/checker/lexicographic_order_map.py
+++ b/loopy/schedule/checker/lexicographic_order_map.py
@@ -92,7 +92,7 @@ def _create_lex_order_set(
     :arg islvars: A dictionary mapping variable names in `dim_names` to
         :class:`islpy.PwAff` instances that represent each of the variables
         (islvars may be produced by `islpy.make_zero_and_vars`).
-        The key '0' is also include and represents a :class:`islpy.PwAff` zero
+        The key '0' is also included and represents a :class:`islpy.PwAff` zero
         constant. This dictionary defines the space to be used for the set and
         must also include versions of `dim_names` with the `in_dim_mark`
         appended. If no value is passed, the dictionary will be made using

From 9668335ff93ccf92df09accb93b797c303b20e9b Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Mon, 19 Jul 2021 12:40:16 -0500
Subject: [PATCH 440/460] fix typo in setup.py

---
 setup.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/setup.py b/setup.py
index 2f04d2203..701f796d5 100644
--- a/setup.py
+++ b/setup.py
@@ -90,7 +90,7 @@ def write_git_revision(package_name):
 
           # https://github.com/inducer/loopy/pull/419
           "numpy>=1.19",
-          "dataclasses>=0.7;python_version<='3.6'"
+          "dataclasses>=0.7;python_version<='3.6'",
 
           "cgen>=2016.1",
           "islpy>=2019.1",

From 74b3f4bfb21c4f07f401bd1b07bcc0d0d2d89222 Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Mon, 19 Jul 2021 12:50:56 -0500
Subject: [PATCH 441/460] rename islvars->var_name_to_pwaff

---
 .../checker/lexicographic_order_map.py        | 25 +++++-----
 loopy/schedule/checker/utils.py               | 48 +++++++++----------
 2 files changed, 38 insertions(+), 35 deletions(-)

diff --git a/loopy/schedule/checker/lexicographic_order_map.py b/loopy/schedule/checker/lexicographic_order_map.py
index aac4ac8c7..5821202cb 100644
--- a/loopy/schedule/checker/lexicographic_order_map.py
+++ b/loopy/schedule/checker/lexicographic_order_map.py
@@ -74,7 +74,7 @@ def get_statement_ordering_map(
 def _create_lex_order_set(
         dim_names,
         in_dim_mark,
-        islvars=None,
+        var_name_to_pwaff=None,
         ):
     """Return an :class:`islpy.Set` representing a lexicographic ordering
     over a space with the number of dimensions provided in `dim_names`
@@ -89,9 +89,9 @@ def _create_lex_order_set(
         distinguish corresponding dimensions in before-after pairs of points.
         (see example below)
 
-    :arg islvars: A dictionary mapping variable names in `dim_names` to
+    :arg var_name_to_pwaff: A dictionary mapping variable names in `dim_names` to
         :class:`islpy.PwAff` instances that represent each of the variables
-        (islvars may be produced by `islpy.make_zero_and_vars`).
+        (var_name_to_pwaff may be produced by `islpy.make_zero_and_vars`).
         The key '0' is also included and represents a :class:`islpy.PwAff` zero
         constant. This dictionary defines the space to be used for the set and
         must also include versions of `dim_names` with the `in_dim_mark`
@@ -121,33 +121,36 @@ def _create_lex_order_set(
 
     in_dim_names = append_mark_to_strings(dim_names, mark=in_dim_mark)
 
-    # If no islvars passed, make them using the names provided
+    # If no var_name_to_pwaff passed, make them using the names provided
     # (make sure to pass var names in desired order of space dims)
-    if islvars is None:
-        islvars = isl.make_zero_and_vars(
+    if var_name_to_pwaff is None:
+        var_name_to_pwaff = isl.make_zero_and_vars(
             in_dim_names+dim_names,
             [])
 
     # Initialize set with constraint i0' < i0
-    lex_order_set = islvars[in_dim_names[0]].lt_set(islvars[dim_names[0]])
+    lex_order_set = var_name_to_pwaff[in_dim_names[0]].lt_set(
+        var_name_to_pwaff[dim_names[0]])
 
     # For each dim d, starting with d=1, equality_conj_set will be constrained
     # by d equalities, e.g., (i0' = i0 and i1' = i1 and ... i(d-1)' = i(d-1)).
-    equality_conj_set = islvars[0].eq_set(islvars[0])  # initialize to 'true'
+    equality_conj_set = var_name_to_pwaff[0].eq_set(
+        var_name_to_pwaff[0])  # initialize to 'true'
 
     for i in range(1, len(in_dim_names)):
 
         # Add the next equality constraint to equality_conj_set
         equality_conj_set = equality_conj_set & \
-            islvars[in_dim_names[i-1]].eq_set(islvars[dim_names[i-1]])
+            var_name_to_pwaff[in_dim_names[i-1]].eq_set(
+                var_name_to_pwaff[dim_names[i-1]])
 
         # Create a set constrained by adding a less-than constraint for this dim,
         # e.g., (i1' < i1), to the current equality conjunction set.
         # For each dim d, starting with d=1, this full conjunction will have
         # d equalities and one inequality, e.g.,
         # (i0' = i0 and i1' = i1 and ... i(d-1)' = i(d-1) and id' < id)
-        full_conj_set = islvars[in_dim_names[i]].lt_set(
-            islvars[dim_names[i]]) & equality_conj_set
+        full_conj_set = var_name_to_pwaff[in_dim_names[i]].lt_set(
+            var_name_to_pwaff[dim_names[i]]) & equality_conj_set
 
         # Union this new constraint with the current lex_order_set
         lex_order_set = lex_order_set | full_conj_set
diff --git a/loopy/schedule/checker/utils.py b/loopy/schedule/checker/utils.py
index 0698faf21..94f2fbd0c 100644
--- a/loopy/schedule/checker/utils.py
+++ b/loopy/schedule/checker/utils.py
@@ -175,13 +175,24 @@ def create_symbolic_map_from_tuples(
         on these values.
 
     """
-    # TODO allow None for domains
+    # FIXME allow None for domains
 
     space_out_names = space.get_var_names(dt.out)
     space_in_names = space.get_var_names(dt.in_)
 
+    def _conjunction_of_dim_eq_conditions(dim_names, values, var_name_to_pwaff):
+        condition = var_name_to_pwaff[0].eq_set(var_name_to_pwaff[0])
+        for dim_name, val in zip(dim_names, values):
+            if isinstance(val, int):
+                condition = condition \
+                    & var_name_to_pwaff[dim_name].eq_set(var_name_to_pwaff[0]+val)
+            else:
+                condition = condition \
+                    & var_name_to_pwaff[dim_name].eq_set(var_name_to_pwaff[val])
+        return condition
+
     # Get islvars from space
-    islvars = isl.affs_from_space(
+    var_name_to_pwaff = isl.affs_from_space(
         space.move_dims(
             dt.out, 0,
             dt.in_, 0,
@@ -189,20 +200,9 @@ def create_symbolic_map_from_tuples(
             ).range()
         )
 
-    def _conjunction_of_dim_eq_conditions(dim_names, values, islvars):
-        condition = islvars[0].eq_set(islvars[0])
-        for dim_name, val in zip(dim_names, values):
-            if isinstance(val, int):
-                condition = condition \
-                    & islvars[dim_name].eq_set(islvars[0]+val)
-            else:
-                condition = condition \
-                    & islvars[dim_name].eq_set(islvars[val])
-        return condition
-
     # Initialize union of maps to empty
     union_of_maps = isl.Map.from_domain(
-        islvars[0].eq_set(islvars[0]+1)  # 0 == 1 (false)
+        var_name_to_pwaff[0].eq_set(var_name_to_pwaff[0]+1)  # 0 == 1 (false)
         ).move_dims(
             dt.out, 0, dt.in_, len(space_in_names), len(space_out_names))
 
@@ -211,11 +211,11 @@ def _conjunction_of_dim_eq_conditions(dim_names, values, islvars):
 
         # Set values for 'in' dimension using tuple vals
         condition = _conjunction_of_dim_eq_conditions(
-            space_in_names, tup_in, islvars)
+            space_in_names, tup_in, var_name_to_pwaff)
 
         # Set values for 'out' dimension using tuple vals
         condition = condition & _conjunction_of_dim_eq_conditions(
-            space_out_names, tup_out, islvars)
+            space_out_names, tup_out, var_name_to_pwaff)
 
         # Convert set to map by moving dimensions around
         map_from_set = isl.Map.from_domain(condition)
@@ -263,7 +263,7 @@ def get_EnterLoop_inames(linearization_items):
 
 
 def create_elementwise_comparison_conjunction_set(
-        names0, names1, islvars, op="eq"):
+        names0, names1, var_name_to_pwaff, op="eq"):
     """Create a set constrained by the conjunction of conditions comparing
     `names0` to `names1`.
 
@@ -271,27 +271,27 @@ def create_elementwise_comparison_conjunction_set(
 
     :arg names1: A list of :class:`str` representing variable names.
 
-    :arg islvars: A dictionary from variable names to :class:`islpy.PwAff`
+    :arg var_name_to_pwaff: A dictionary from variable names to :class:`islpy.PwAff`
         instances that represent each of the variables
-        (islvars may be produced by `islpy.make_zero_and_vars`). The key
+        (var_name_to_pwaff may be produced by `islpy.make_zero_and_vars`). The key
         '0' is also include and represents a :class:`islpy.PwAff` zero constant.
 
     :arg op: A :class:`str` describing the operator to use when creating
         the set constraints. Options: `eq` for `=`, `lt` for `<`
 
-    :returns: A set involving `islvars` cosntrained by the constraints
+    :returns: A set involving `var_name_to_pwaff` cosntrained by the constraints
         `{names0[0] <op> names1[0] and names0[1] <op> names1[1] and ...}`.
 
     """
 
     # initialize set with constraint that is always true
-    conj_set = islvars[0].eq_set(islvars[0])
+    conj_set = var_name_to_pwaff[0].eq_set(var_name_to_pwaff[0])
     for n0, n1 in zip(names0, names1):
         if op == "eq":
-            conj_set = conj_set & islvars[n0].eq_set(islvars[n1])
+            conj_set = conj_set & var_name_to_pwaff[n0].eq_set(var_name_to_pwaff[n1])
         elif op == "ne":
-            conj_set = conj_set & islvars[n0].ne_set(islvars[n1])
+            conj_set = conj_set & var_name_to_pwaff[n0].ne_set(var_name_to_pwaff[n1])
         elif op == "lt":
-            conj_set = conj_set & islvars[n0].lt_set(islvars[n1])
+            conj_set = conj_set & var_name_to_pwaff[n0].lt_set(var_name_to_pwaff[n1])
 
     return conj_set

From 2ef18166d3a10019520687ad7fdd3d385f857ead Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Mon, 19 Jul 2021 13:06:18 -0500
Subject: [PATCH 442/460] add foldmethod=maker line for vim

---
 loopy/schedule/checker/__init__.py | 2 ++
 loopy/schedule/checker/schedule.py | 2 ++
 2 files changed, 4 insertions(+)

diff --git a/loopy/schedule/checker/__init__.py b/loopy/schedule/checker/__init__.py
index d24b7a2ea..b994d8768 100644
--- a/loopy/schedule/checker/__init__.py
+++ b/loopy/schedule/checker/__init__.py
@@ -142,3 +142,5 @@ def get_pairwise_statement_orderings(
     # }}}
 
 # }}}
+
+# vim: foldmethod=marker
diff --git a/loopy/schedule/checker/schedule.py b/loopy/schedule/checker/schedule.py
index 85e1c8429..c7c545964 100644
--- a/loopy/schedule/checker/schedule.py
+++ b/loopy/schedule/checker/schedule.py
@@ -976,3 +976,5 @@ def _get_sched_maps_and_sio(
     return pairwise_sios
 
 # }}}
+
+# vim: foldmethod=marker

From 0e8be5b5f23ca236074911051a432b286908dc1b Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Mon, 19 Jul 2021 13:25:49 -0500
Subject: [PATCH 443/460] clarify comment

---
 loopy/schedule/checker/schedule.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/loopy/schedule/checker/schedule.py b/loopy/schedule/checker/schedule.py
index c7c545964..5940059c9 100644
--- a/loopy/schedule/checker/schedule.py
+++ b/loopy/schedule/checker/schedule.py
@@ -512,7 +512,8 @@ def _gather_blex_ordering_info(sync_kind):
                         slex.TOP: tuple(next_blex_tuple),
                         slex.FIRST: tuple(first_iter_blex_pt),
                         }
-                    # (make sure ^these are copies)
+                    # (copy these three blex points when creating dict because
+                    # the lists will continue to be updated)
 
                     # Store any new params found
                     blex_order_map_params |= set(lbound.get_var_names(dt.param))
@@ -548,7 +549,8 @@ def _gather_blex_ordering_info(sync_kind):
                         last_iter_blex_pt)
                     blex_exclusion_info[leave_iname][slex.POST] = tuple(
                         next_blex_tuple)
-                    # (make sure ^these are copies)
+                    # (copy these three blex points when creating dict because
+                    # the lists will continue to be updated)
 
                     # Store any new params found
                     blex_order_map_params |= set(ubound.get_var_names(dt.param))

From d4623c6e539d37a31f31fdc0f1cbb737bcfc859f Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Mon, 19 Jul 2021 18:09:12 -0500
Subject: [PATCH 444/460] promote _gather_blex_ordering_info() to top level
 (step 1, temporarily keep old version for sanity check)

---
 loopy/schedule/checker/schedule.py | 374 ++++++++++++++++++++++++++++-
 1 file changed, 371 insertions(+), 3 deletions(-)

diff --git a/loopy/schedule/checker/schedule.py b/loopy/schedule/checker/schedule.py
index 5940059c9..deb53a264 100644
--- a/loopy/schedule/checker/schedule.py
+++ b/loopy/schedule/checker/schedule.py
@@ -217,6 +217,344 @@ class StatementOrdering:
 # }}}
 
 
+# {{{ _gather_blex_ordering_info
+
+def _gather_blex_ordering_info(
+        sync_kind,
+        lin_items, loops_with_barriers, loops_to_ignore,
+        all_stmt_ids, iname_bounds_pwaff,
+        all_par_lex_dim_names, gid_lex_dim_names,
+        ):
+    """For the given sync_kind ("local" or "global"), create a mapping from
+    statement instances to blex space (dict), as well as a mapping
+    defining the blex ordering (isl map from blex space -> blex space)
+
+    Note that, unlike in the intra-thread case, there will be a single
+    blex ordering map defining the blex ordering for all statement pairs,
+    rather than separate (smaller) lex ordering maps for each pair
+    """
+    from loopy.schedule import (EnterLoop, LeaveLoop, Barrier, RunInstruction)
+    from loopy.schedule.checker.lexicographic_order_map import (
+        create_lex_order_map,
+    )
+    from loopy.schedule.checker.utils import (
+        add_and_name_isl_dims,
+        append_mark_to_strings,
+        add_eq_isl_constraint_from_names,
+    )
+    slex = SpecialLexPointWRTLoop
+
+    # {{{ First, create map from stmt instances to blex space.
+
+    # At the same time, gather information necessary to create the
+    # blex ordering map, i.e., for each loop, gather the 6 lex order tuples
+    # defined above in SpecialLexPointWRTLoop that will be required to
+    # create sub-maps which will be *excluded* (subtracted) from a standard
+    # lexicographic ordering in order to create the blex ordering
+
+    stmt_inst_to_blex = {}  # Map stmt instances to blex space
+    iname_to_blex_dim = {}  # Map from inames to corresponding blex space dim
+    blex_exclusion_info = {}  # Info for creating maps to exclude from blex order
+    blex_order_map_params = set()  # Params needed in blex order map
+    n_seq_blex_dims = 1  # Num dims representing sequential order in blex space
+    next_blex_tuple = [0]  # Next tuple of points in blex order
+
+    for lin_item in lin_items:
+        if isinstance(lin_item, EnterLoop):
+            enter_iname = lin_item.iname
+            if enter_iname in loops_with_barriers[sync_kind] - loops_to_ignore:
+                pre_loop_blex_pt = next_blex_tuple[:]
+
+                # Increment next_blex_tuple[-1] for statements in the section
+                # of code between this EnterLoop and the matching LeaveLoop.
+                next_blex_tuple[-1] += 1
+
+                # Upon entering a loop, add one blex dimension for the loop
+                # iteration, add second blex dim to enumerate sections of
+                # code within new loop
+                next_blex_tuple.append(enter_iname)
+                next_blex_tuple.append(0)
+
+                # Store 3 tuples that will be used later to create pairs
+                # that will later be subtracted from the blex order map
+                lbound = iname_bounds_pwaff[enter_iname][0]
+                first_iter_blex_pt = next_blex_tuple[:]
+                first_iter_blex_pt[-2] = lbound
+                blex_exclusion_info[enter_iname] = {
+                    slex.PRE: tuple(pre_loop_blex_pt),
+                    slex.TOP: tuple(next_blex_tuple),
+                    slex.FIRST: tuple(first_iter_blex_pt),
+                    }
+                # (copy these three blex points when creating dict because
+                # the lists will continue to be updated)
+
+                # Store any new params found
+                blex_order_map_params |= set(lbound.get_var_names(dt.param))
+
+        elif isinstance(lin_item, LeaveLoop):
+            leave_iname = lin_item.iname
+            if leave_iname in loops_with_barriers[sync_kind] - loops_to_ignore:
+
+                # Update max blex dims
+                n_seq_blex_dims = max(n_seq_blex_dims, len(next_blex_tuple))
+
+                # Record the blex dim for this loop iname
+                iname_to_blex_dim[leave_iname] = len(next_blex_tuple)-2
+
+                # Update next blex pt
+                pre_end_loop_blex_pt = next_blex_tuple[:]
+                # Upon leaving a loop:
+                # - Pop lex dim for enumerating code sections within this loop
+                # - Pop lex dim for the loop iteration
+                # - Increment lex dim val enumerating items in current section
+                next_blex_tuple.pop()
+                next_blex_tuple.pop()
+                next_blex_tuple[-1] += 1
+
+                # Store 3 tuples that will be used later to create pairs
+                # that will later be subtracted from the blex order map
+                ubound = iname_bounds_pwaff[leave_iname][1]
+                last_iter_blex_pt = pre_end_loop_blex_pt[:]
+                last_iter_blex_pt[-2] = ubound
+                blex_exclusion_info[leave_iname][slex.BOTTOM] = tuple(
+                    pre_end_loop_blex_pt)
+                blex_exclusion_info[leave_iname][slex.LAST] = tuple(
+                    last_iter_blex_pt)
+                blex_exclusion_info[leave_iname][slex.POST] = tuple(
+                    next_blex_tuple)
+                # (copy these three blex points when creating dict because
+                # the lists will continue to be updated)
+
+                # Store any new params found
+                blex_order_map_params |= set(ubound.get_var_names(dt.param))
+
+        elif isinstance(lin_item, RunInstruction):
+            # Add stmt->blex pair to stmt_inst_to_blex
+            stmt_inst_to_blex[lin_item.insn_id] = tuple(next_blex_tuple)
+
+            # (Don't increment blex dim val)
+
+        elif isinstance(lin_item, Barrier):
+            # Increment blex dim val if the sync scope matches
+            if lin_item.synchronization_kind == sync_kind:
+                next_blex_tuple[-1] += 1
+
+            lp_stmt_id = lin_item.originating_insn_id
+
+            if lp_stmt_id is None:
+                # Barriers without stmt ids were inserted as a result of a
+                # dependency. They don't themselves have dependencies.
+                # Don't map this barrier to a blex tuple.
+                continue
+
+            # This barrier has a stmt id.
+            # If it was included in listed stmts, process it.
+            # Otherwise, there's nothing left to do (we've already
+            # incremented next_blex_tuple if necessary, and this barrier
+            # does not need to be assigned to a designated point in blex
+            # time)
+            if lp_stmt_id in all_stmt_ids:
+                # If sync scope matches, give this barrier its own point in
+                # lex time and update blex tuple after barrier.
+                # Otherwise, add stmt->blex pair to stmt_inst_to_blex, but
+                # don't update the blex tuple (just like with any other
+                # stmt)
+                if lin_item.synchronization_kind == sync_kind:
+                    stmt_inst_to_blex[lp_stmt_id] = tuple(next_blex_tuple)
+                    next_blex_tuple[-1] += 1
+                else:
+                    stmt_inst_to_blex[lp_stmt_id] = tuple(next_blex_tuple)
+        else:
+            from loopy.schedule import (CallKernel, ReturnFromKernel)
+            # No action needed for these types of linearization item
+            assert isinstance(
+                lin_item, (CallKernel, ReturnFromKernel))
+            pass
+
+    blex_order_map_params = sorted(blex_order_map_params)
+
+    # At this point, some blex tuples may have more dimensions than others;
+    # the missing dims are the fastest-updating dims, and their values should
+    # be zero. Add them.
+    for stmt, tup in stmt_inst_to_blex.items():
+        stmt_inst_to_blex[stmt] = _pad_tuple_with_zeros(tup, n_seq_blex_dims)
+
+    # }}}
+
+    # {{{ Second, create the blex order map
+
+    # {{{ Create the initial (pre-subtraction) blex order map
+
+    # Create names for the blex dimensions for sequential loops
+    seq_blex_dim_names = [
+        LEX_VAR_PREFIX+str(i) for i in range(n_seq_blex_dims)]
+    seq_blex_dim_names_prime = append_mark_to_strings(
+        seq_blex_dim_names, mark=BEFORE_MARK)
+
+    # Begin with the blex order map created as a standard lexicographical order
+    blex_order_map = create_lex_order_map(
+        dim_names=seq_blex_dim_names,
+        in_dim_mark=BEFORE_MARK,
+        )
+
+    # Add LID/GID dims to blex order map
+    blex_order_map = add_and_name_isl_dims(
+        blex_order_map, dt.out, all_par_lex_dim_names)
+    blex_order_map = add_and_name_isl_dims(
+        blex_order_map, dt.in_,
+        append_mark_to_strings(all_par_lex_dim_names, mark=BEFORE_MARK))
+    if sync_kind == "local":
+        # For intra-group case, constrain GID 'before' to equal GID 'after'
+        for var_name in gid_lex_dim_names:
+            blex_order_map = add_eq_isl_constraint_from_names(
+                    blex_order_map, var_name, var_name+BEFORE_MARK)
+    # (if sync_kind == "global", don't need constraints on LID/GID vars)
+
+    # }}}
+
+    # {{{ Subtract unwanted pairs from happens-before blex map
+
+    # Create map from iname to corresponding blex dim name
+    iname_to_blex_var = {}
+    for iname, dim in iname_to_blex_dim.items():
+        iname_to_blex_var[iname] = seq_blex_dim_names[dim]
+        iname_to_blex_var[iname+BEFORE_MARK] = seq_blex_dim_names_prime[dim]
+
+    # Add bounds params needed in blex map
+    blex_order_map = add_and_name_isl_dims(
+        blex_order_map, dt.param, blex_order_map_params)
+
+    # Get a set representing blex_order_map space
+    n_blex_dims = n_seq_blex_dims + len(all_par_lex_dim_names)
+    blex_set_template = isl.align_spaces(
+        isl.Map("[ ] -> { [ ] -> [ ] }"), blex_order_map
+        ).move_dims(
+        dt.in_, n_blex_dims, dt.out, 0, n_blex_dims
+        ).domain()
+    blex_set_affs = isl.affs_from_space(blex_set_template.space)
+
+    # {{{ _create_excluded_map_for_iname
+
+    def _create_excluded_map_for_iname(iname, key_lex_tuples):
+        """Create the blex->blex pairs that must be subtracted from the
+        initial blex order map for this particular loop using the 6 blex
+        tuples in the key_lex_tuples:
+        PRE->FIRST, BOTTOM(iname')->TOP(iname'+1), LAST->POST
+        """
+
+        # Note:
+        # only key_lex_tuples[slex.FIRST] & key_lex_tuples[slex.LAST] are pwaffs
+
+        # {{{ _create_blex_set_from_tuple_pair
+
+        def _create_blex_set_from_tuple_pair(before, after, wrap_cond=False):
+            """Given a before->after tuple pair in the key_lex_tuples, which may
+            have dim vals described by ints, strings (inames), and pwaffs,
+            create an ISL set in blex space that can be converted into
+            the ISL map to be subtracted
+            """
+            # (Vars from outside func used here:
+            # iname, blex_set_affs, blex_set_template, iname_to_blex_var,
+            # n_seq_blex_dims, seq_blex_dim_names,
+            # seq_blex_dim_names_prime)
+
+            # Start with a set representing blex_order_map space
+            blex_set = blex_set_template.copy()
+
+            # Add marks to inames in the 'before' tuple
+            # (all strings should be inames)
+            before_prime = tuple(
+                v+BEFORE_MARK if isinstance(v, str) else v for v in before)
+            before_padded = _pad_tuple_with_zeros(before_prime, n_seq_blex_dims)
+            after_padded = _pad_tuple_with_zeros(after, n_seq_blex_dims)
+
+            # Assign vals in the tuple to dims in the ISL set
+            for dim_name, dim_val in zip(
+                    seq_blex_dim_names_prime+seq_blex_dim_names,
+                    before_padded+after_padded):
+
+                if isinstance(dim_val, int):
+                    # Set idx to int val
+                    blex_set &= blex_set_affs[dim_name].eq_set(
+                        blex_set_affs[0]+dim_val)
+                elif isinstance(dim_val, str):
+                    # This is an iname, set idx to corresponding blex var
+                    blex_set &= blex_set_affs[dim_name].eq_set(
+                        blex_set_affs[iname_to_blex_var[dim_val]])
+                else:
+                    # This is a pwaff iname bound, align and intersect
+                    assert isinstance(dim_val, isl.PwAff)
+                    pwaff_aligned = isl.align_spaces(dim_val, blex_set_affs[0])
+                    # (doesn't matter which blex_set_affs item we align to^)
+                    blex_set &= blex_set_affs[dim_name].eq_set(pwaff_aligned)
+
+            if wrap_cond:
+                # This is the BOTTOM->TOP pair, add condition i = i' + 1
+                blex_set &= blex_set_affs[iname_to_blex_var[iname]].eq_set(
+                    blex_set_affs[iname_to_blex_var[iname+BEFORE_MARK]] + 1)
+
+            return blex_set
+
+        # }}} end _create_blex_set_from_tuple_pair()
+
+        # Create pairs to be subtracted
+        # (set will be converted to map)
+
+        # Enter loop case: PRE->FIRST
+        full_blex_set = _create_blex_set_from_tuple_pair(
+            key_lex_tuples[slex.PRE], key_lex_tuples[slex.FIRST])
+        # Wrap loop case: BOTTOM(iname')->TOP(iname'+1)
+        full_blex_set |= _create_blex_set_from_tuple_pair(
+            key_lex_tuples[slex.BOTTOM], key_lex_tuples[slex.TOP],
+            wrap_cond=True)
+        # Leave loop case: LAST->POST
+        full_blex_set |= _create_blex_set_from_tuple_pair(
+            key_lex_tuples[slex.LAST], key_lex_tuples[slex.POST])
+
+        # Add condition to fix iteration value for *surrounding* loops (j = j')
+        for surrounding_iname in key_lex_tuples[slex.PRE][1::2]:
+            s_blex_var = iname_to_blex_var[surrounding_iname]
+            full_blex_set &= blex_set_affs[s_blex_var].eq_set(
+                blex_set_affs[s_blex_var+BEFORE_MARK])
+
+        # Convert blex set back to map
+        return isl.Map.from_domain(full_blex_set).move_dims(
+            dt.out, 0, dt.in_, n_blex_dims, n_blex_dims)
+
+    # }}} end _create_excluded_map_for_iname()
+
+    # Create map to subtract for each iname
+    maps_to_subtract = []
+    for iname, subdict in blex_exclusion_info.items():
+        maps_to_subtract.append(_create_excluded_map_for_iname(iname, subdict))
+
+    if maps_to_subtract:
+
+        # Get union of maps
+        map_to_subtract = maps_to_subtract[0]
+        for other_map in maps_to_subtract[1:]:
+            map_to_subtract |= other_map
+
+        # Get transitive closure of maps
+        map_to_subtract, closure_exact = map_to_subtract.transitive_closure()
+        assert closure_exact  # TODO warn instead?
+
+        # Subtract closure from blex order map
+        blex_order_map = blex_order_map - map_to_subtract
+
+    # }}}
+
+    # }}}
+
+    return (
+        stmt_inst_to_blex,  # map stmt instances to blex space
+        blex_order_map,
+        seq_blex_dim_names,
+        )
+
+# }}}
+
+
 # {{{ get_pairwise_statement_orderings_inner
 
 def get_pairwise_statement_orderings_inner(
@@ -461,7 +799,7 @@ def get_pairwise_statement_orderings_inner(
 
     # {{{ _gather_blex_ordering_info(sync_kind): gather blex info for sync_kind
 
-    def _gather_blex_ordering_info(sync_kind):
+    def _gather_blex_ordering_info_orig(sync_kind):
         """For the given sync_kind ("local" or "global"), create a mapping from
         statement instances to blex space (dict), as well as a mapping
         defining the blex ordering (isl map from blex space -> blex space)
@@ -781,14 +1119,44 @@ def _create_blex_set_from_tuple_pair(before, after, wrap_cond=False):
 
     # }}} end _gather_blex_ordering_info(sync_kind)
 
+    # TODO remove old function call after comparing results for sanity check
+
+    # Get the blex schedule blueprint (dict will become a map below) and
+    # blex order map w.r.t. local and global barriers
+    (_stmt_inst_to_lblex,
+     _lblex_order_map,
+     _seq_lblex_dim_names) = _gather_blex_ordering_info_orig("local")
+    (_stmt_inst_to_gblex,
+     _gblex_order_map,
+     _seq_gblex_dim_names) = _gather_blex_ordering_info_orig("global")
+
     # Get the blex schedule blueprint (dict will become a map below) and
     # blex order map w.r.t. local and global barriers
     (stmt_inst_to_lblex,
      lblex_order_map,
-     seq_lblex_dim_names) = _gather_blex_ordering_info("local")
+     seq_lblex_dim_names) = _gather_blex_ordering_info(
+        "local",
+        lin_items, loops_with_barriers, loops_to_ignore,
+        all_stmt_ids, iname_bounds_pwaff,
+        all_par_lex_dim_names, gid_lex_dim_names,
+        )
     (stmt_inst_to_gblex,
      gblex_order_map,
-     seq_gblex_dim_names) = _gather_blex_ordering_info("global")
+     seq_gblex_dim_names) = _gather_blex_ordering_info(
+        "global",
+        lin_items, loops_with_barriers, loops_to_ignore,
+        all_stmt_ids, iname_bounds_pwaff,
+        all_par_lex_dim_names, gid_lex_dim_names,
+        )
+
+    assert _stmt_inst_to_lblex == stmt_inst_to_lblex
+    assert _lblex_order_map == lblex_order_map
+    assert _lblex_order_map.get_var_dict() == lblex_order_map.get_var_dict()
+    assert _seq_lblex_dim_names == seq_lblex_dim_names
+    assert _stmt_inst_to_gblex == stmt_inst_to_gblex
+    assert _gblex_order_map == gblex_order_map
+    assert _gblex_order_map.get_var_dict() == gblex_order_map.get_var_dict()
+    assert _seq_gblex_dim_names == seq_gblex_dim_names
 
     # }}}
 

From 6fab4bc3084ac922c07adf3af971889da54de8ee Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Mon, 19 Jul 2021 18:15:19 -0500
Subject: [PATCH 445/460] remove sanity check and old version of
 _gather_blex_ordering_info() after promoting func to top level

---
 loopy/schedule/checker/schedule.py | 343 -----------------------------
 1 file changed, 343 deletions(-)

diff --git a/loopy/schedule/checker/schedule.py b/loopy/schedule/checker/schedule.py
index deb53a264..f9dad40e1 100644
--- a/loopy/schedule/checker/schedule.py
+++ b/loopy/schedule/checker/schedule.py
@@ -622,7 +622,6 @@ def get_pairwise_statement_orderings_inner(
         create_symbolic_map_from_tuples,
         insert_and_name_isl_dims,
     )
-    slex = SpecialLexPointWRTLoop
 
     all_stmt_ids = set().union(*stmt_id_pairs)
 
@@ -797,339 +796,6 @@ def get_pairwise_statement_orderings_inner(
 
     all_par_lex_dim_names = lid_lex_dim_names + gid_lex_dim_names
 
-    # {{{ _gather_blex_ordering_info(sync_kind): gather blex info for sync_kind
-
-    def _gather_blex_ordering_info_orig(sync_kind):
-        """For the given sync_kind ("local" or "global"), create a mapping from
-        statement instances to blex space (dict), as well as a mapping
-        defining the blex ordering (isl map from blex space -> blex space)
-
-        Note that, unlike in the intra-thread case, there will be a single
-        blex ordering map defining the blex ordering for all statement pairs,
-        rather than separate (smaller) lex ordering maps for each pair
-        """
-
-        # {{{ First, create map from stmt instances to blex space.
-
-        # At the same time, gather information necessary to create the
-        # blex ordering map, i.e., for each loop, gather the 6 lex order tuples
-        # defined above in SpecialLexPointWRTLoop that will be required to
-        # create sub-maps which will be *excluded* (subtracted) from a standard
-        # lexicographic ordering in order to create the blex ordering
-
-        stmt_inst_to_blex = {}  # Map stmt instances to blex space
-        iname_to_blex_dim = {}  # Map from inames to corresponding blex space dim
-        blex_exclusion_info = {}  # Info for creating maps to exclude from blex order
-        blex_order_map_params = set()  # Params needed in blex order map
-        n_seq_blex_dims = 1  # Num dims representing sequential order in blex space
-        next_blex_tuple = [0]  # Next tuple of points in blex order
-
-        for lin_item in lin_items:
-            if isinstance(lin_item, EnterLoop):
-                enter_iname = lin_item.iname
-                if enter_iname in loops_with_barriers[sync_kind] - loops_to_ignore:
-                    pre_loop_blex_pt = next_blex_tuple[:]
-
-                    # Increment next_blex_tuple[-1] for statements in the section
-                    # of code between this EnterLoop and the matching LeaveLoop.
-                    next_blex_tuple[-1] += 1
-
-                    # Upon entering a loop, add one blex dimension for the loop
-                    # iteration, add second blex dim to enumerate sections of
-                    # code within new loop
-                    next_blex_tuple.append(enter_iname)
-                    next_blex_tuple.append(0)
-
-                    # Store 3 tuples that will be used later to create pairs
-                    # that will later be subtracted from the blex order map
-                    lbound = iname_bounds_pwaff[enter_iname][0]
-                    first_iter_blex_pt = next_blex_tuple[:]
-                    first_iter_blex_pt[-2] = lbound
-                    blex_exclusion_info[enter_iname] = {
-                        slex.PRE: tuple(pre_loop_blex_pt),
-                        slex.TOP: tuple(next_blex_tuple),
-                        slex.FIRST: tuple(first_iter_blex_pt),
-                        }
-                    # (copy these three blex points when creating dict because
-                    # the lists will continue to be updated)
-
-                    # Store any new params found
-                    blex_order_map_params |= set(lbound.get_var_names(dt.param))
-
-            elif isinstance(lin_item, LeaveLoop):
-                leave_iname = lin_item.iname
-                if leave_iname in loops_with_barriers[sync_kind] - loops_to_ignore:
-
-                    # Update max blex dims
-                    n_seq_blex_dims = max(n_seq_blex_dims, len(next_blex_tuple))
-
-                    # Record the blex dim for this loop iname
-                    iname_to_blex_dim[leave_iname] = len(next_blex_tuple)-2
-
-                    # Update next blex pt
-                    pre_end_loop_blex_pt = next_blex_tuple[:]
-                    # Upon leaving a loop:
-                    # - Pop lex dim for enumerating code sections within this loop
-                    # - Pop lex dim for the loop iteration
-                    # - Increment lex dim val enumerating items in current section
-                    next_blex_tuple.pop()
-                    next_blex_tuple.pop()
-                    next_blex_tuple[-1] += 1
-
-                    # Store 3 tuples that will be used later to create pairs
-                    # that will later be subtracted from the blex order map
-                    ubound = iname_bounds_pwaff[leave_iname][1]
-                    last_iter_blex_pt = pre_end_loop_blex_pt[:]
-                    last_iter_blex_pt[-2] = ubound
-                    blex_exclusion_info[leave_iname][slex.BOTTOM] = tuple(
-                        pre_end_loop_blex_pt)
-                    blex_exclusion_info[leave_iname][slex.LAST] = tuple(
-                        last_iter_blex_pt)
-                    blex_exclusion_info[leave_iname][slex.POST] = tuple(
-                        next_blex_tuple)
-                    # (copy these three blex points when creating dict because
-                    # the lists will continue to be updated)
-
-                    # Store any new params found
-                    blex_order_map_params |= set(ubound.get_var_names(dt.param))
-
-            elif isinstance(lin_item, RunInstruction):
-                # Add stmt->blex pair to stmt_inst_to_blex
-                stmt_inst_to_blex[lin_item.insn_id] = tuple(next_blex_tuple)
-
-                # (Don't increment blex dim val)
-
-            elif isinstance(lin_item, Barrier):
-                # Increment blex dim val if the sync scope matches
-                if lin_item.synchronization_kind == sync_kind:
-                    next_blex_tuple[-1] += 1
-
-                lp_stmt_id = lin_item.originating_insn_id
-
-                if lp_stmt_id is None:
-                    # Barriers without stmt ids were inserted as a result of a
-                    # dependency. They don't themselves have dependencies.
-                    # Don't map this barrier to a blex tuple.
-                    continue
-
-                # This barrier has a stmt id.
-                # If it was included in listed stmts, process it.
-                # Otherwise, there's nothing left to do (we've already
-                # incremented next_blex_tuple if necessary, and this barrier
-                # does not need to be assigned to a designated point in blex
-                # time)
-                if lp_stmt_id in all_stmt_ids:
-                    # If sync scope matches, give this barrier its own point in
-                    # lex time and update blex tuple after barrier.
-                    # Otherwise, add stmt->blex pair to stmt_inst_to_blex, but
-                    # don't update the blex tuple (just like with any other
-                    # stmt)
-                    if lin_item.synchronization_kind == sync_kind:
-                        stmt_inst_to_blex[lp_stmt_id] = tuple(next_blex_tuple)
-                        next_blex_tuple[-1] += 1
-                    else:
-                        stmt_inst_to_blex[lp_stmt_id] = tuple(next_blex_tuple)
-            else:
-                from loopy.schedule import (CallKernel, ReturnFromKernel)
-                # No action needed for these types of linearization item
-                assert isinstance(
-                    lin_item, (CallKernel, ReturnFromKernel))
-                pass
-
-        blex_order_map_params = sorted(blex_order_map_params)
-
-        # At this point, some blex tuples may have more dimensions than others;
-        # the missing dims are the fastest-updating dims, and their values should
-        # be zero. Add them.
-        for stmt, tup in stmt_inst_to_blex.items():
-            stmt_inst_to_blex[stmt] = _pad_tuple_with_zeros(tup, n_seq_blex_dims)
-
-        # }}}
-
-        # {{{ Second, create the blex order map
-
-        # {{{ Create the initial (pre-subtraction) blex order map
-
-        # Create names for the blex dimensions for sequential loops
-        seq_blex_dim_names = [
-            LEX_VAR_PREFIX+str(i) for i in range(n_seq_blex_dims)]
-        seq_blex_dim_names_prime = append_mark_to_strings(
-            seq_blex_dim_names, mark=BEFORE_MARK)
-
-        # Begin with the blex order map created as a standard lexicographical order
-        blex_order_map = create_lex_order_map(
-            dim_names=seq_blex_dim_names,
-            in_dim_mark=BEFORE_MARK,
-            )
-
-        # Add LID/GID dims to blex order map
-        blex_order_map = add_and_name_isl_dims(
-            blex_order_map, dt.out, all_par_lex_dim_names)
-        blex_order_map = add_and_name_isl_dims(
-            blex_order_map, dt.in_,
-            append_mark_to_strings(all_par_lex_dim_names, mark=BEFORE_MARK))
-        if sync_kind == "local":
-            # For intra-group case, constrain GID 'before' to equal GID 'after'
-            for var_name in gid_lex_dim_names:
-                blex_order_map = add_eq_isl_constraint_from_names(
-                        blex_order_map, var_name, var_name+BEFORE_MARK)
-        # (if sync_kind == "global", don't need constraints on LID/GID vars)
-
-        # }}}
-
-        # {{{ Subtract unwanted pairs from happens-before blex map
-
-        # Create map from iname to corresponding blex dim name
-        iname_to_blex_var = {}
-        for iname, dim in iname_to_blex_dim.items():
-            iname_to_blex_var[iname] = seq_blex_dim_names[dim]
-            iname_to_blex_var[iname+BEFORE_MARK] = seq_blex_dim_names_prime[dim]
-
-        # Add bounds params needed in blex map
-        blex_order_map = add_and_name_isl_dims(
-            blex_order_map, dt.param, blex_order_map_params)
-
-        # Get a set representing blex_order_map space
-        n_blex_dims = n_seq_blex_dims + len(all_par_lex_dim_names)
-        blex_set_template = isl.align_spaces(
-            isl.Map("[ ] -> { [ ] -> [ ] }"), blex_order_map
-            ).move_dims(
-            dt.in_, n_blex_dims, dt.out, 0, n_blex_dims
-            ).domain()
-        blex_set_affs = isl.affs_from_space(blex_set_template.space)
-
-        # {{{ _create_excluded_map_for_iname
-
-        def _create_excluded_map_for_iname(iname, key_lex_tuples):
-            """Create the blex->blex pairs that must be subtracted from the
-            initial blex order map for this particular loop using the 6 blex
-            tuples in the key_lex_tuples:
-            PRE->FIRST, BOTTOM(iname')->TOP(iname'+1), LAST->POST
-            """
-
-            # Note:
-            # only key_lex_tuples[slex.FIRST] & key_lex_tuples[slex.LAST] are pwaffs
-
-            # {{{ _create_blex_set_from_tuple_pair
-
-            def _create_blex_set_from_tuple_pair(before, after, wrap_cond=False):
-                """Given a before->after tuple pair in the key_lex_tuples, which may
-                have dim vals described by ints, strings (inames), and pwaffs,
-                create an ISL set in blex space that can be converted into
-                the ISL map to be subtracted
-                """
-                # (Vars from outside func used here:
-                # iname, blex_set_affs, blex_set_template, iname_to_blex_var,
-                # n_seq_blex_dims, seq_blex_dim_names,
-                # seq_blex_dim_names_prime)
-
-                # Start with a set representing blex_order_map space
-                blex_set = blex_set_template.copy()
-
-                # Add marks to inames in the 'before' tuple
-                # (all strings should be inames)
-                before_prime = tuple(
-                    v+BEFORE_MARK if isinstance(v, str) else v for v in before)
-                before_padded = _pad_tuple_with_zeros(before_prime, n_seq_blex_dims)
-                after_padded = _pad_tuple_with_zeros(after, n_seq_blex_dims)
-
-                # Assign vals in the tuple to dims in the ISL set
-                for dim_name, dim_val in zip(
-                        seq_blex_dim_names_prime+seq_blex_dim_names,
-                        before_padded+after_padded):
-
-                    if isinstance(dim_val, int):
-                        # Set idx to int val
-                        blex_set &= blex_set_affs[dim_name].eq_set(
-                            blex_set_affs[0]+dim_val)
-                    elif isinstance(dim_val, str):
-                        # This is an iname, set idx to corresponding blex var
-                        blex_set &= blex_set_affs[dim_name].eq_set(
-                            blex_set_affs[iname_to_blex_var[dim_val]])
-                    else:
-                        # This is a pwaff iname bound, align and intersect
-                        assert isinstance(dim_val, isl.PwAff)
-                        pwaff_aligned = isl.align_spaces(dim_val, blex_set_affs[0])
-                        # (doesn't matter which blex_set_affs item we align to^)
-                        blex_set &= blex_set_affs[dim_name].eq_set(pwaff_aligned)
-
-                if wrap_cond:
-                    # This is the BOTTOM->TOP pair, add condition i = i' + 1
-                    blex_set &= blex_set_affs[iname_to_blex_var[iname]].eq_set(
-                        blex_set_affs[iname_to_blex_var[iname+BEFORE_MARK]] + 1)
-
-                return blex_set
-
-            # }}} end _create_blex_set_from_tuple_pair()
-
-            # Create pairs to be subtracted
-            # (set will be converted to map)
-
-            # Enter loop case: PRE->FIRST
-            full_blex_set = _create_blex_set_from_tuple_pair(
-                key_lex_tuples[slex.PRE], key_lex_tuples[slex.FIRST])
-            # Wrap loop case: BOTTOM(iname')->TOP(iname'+1)
-            full_blex_set |= _create_blex_set_from_tuple_pair(
-                key_lex_tuples[slex.BOTTOM], key_lex_tuples[slex.TOP],
-                wrap_cond=True)
-            # Leave loop case: LAST->POST
-            full_blex_set |= _create_blex_set_from_tuple_pair(
-                key_lex_tuples[slex.LAST], key_lex_tuples[slex.POST])
-
-            # Add condition to fix iteration value for *surrounding* loops (j = j')
-            for surrounding_iname in key_lex_tuples[slex.PRE][1::2]:
-                s_blex_var = iname_to_blex_var[surrounding_iname]
-                full_blex_set &= blex_set_affs[s_blex_var].eq_set(
-                    blex_set_affs[s_blex_var+BEFORE_MARK])
-
-            # Convert blex set back to map
-            return isl.Map.from_domain(full_blex_set).move_dims(
-                dt.out, 0, dt.in_, n_blex_dims, n_blex_dims)
-
-        # }}} end _create_excluded_map_for_iname()
-
-        # Create map to subtract for each iname
-        maps_to_subtract = []
-        for iname, subdict in blex_exclusion_info.items():
-            maps_to_subtract.append(_create_excluded_map_for_iname(iname, subdict))
-
-        if maps_to_subtract:
-
-            # Get union of maps
-            map_to_subtract = maps_to_subtract[0]
-            for other_map in maps_to_subtract[1:]:
-                map_to_subtract |= other_map
-
-            # Get transitive closure of maps
-            map_to_subtract, closure_exact = map_to_subtract.transitive_closure()
-            assert closure_exact  # TODO warn instead?
-
-            # Subtract closure from blex order map
-            blex_order_map = blex_order_map - map_to_subtract
-
-        # }}}
-
-        # }}}
-
-        return (
-            stmt_inst_to_blex,  # map stmt instances to blex space
-            blex_order_map,
-            seq_blex_dim_names,
-            )
-
-    # }}} end _gather_blex_ordering_info(sync_kind)
-
-    # TODO remove old function call after comparing results for sanity check
-
-    # Get the blex schedule blueprint (dict will become a map below) and
-    # blex order map w.r.t. local and global barriers
-    (_stmt_inst_to_lblex,
-     _lblex_order_map,
-     _seq_lblex_dim_names) = _gather_blex_ordering_info_orig("local")
-    (_stmt_inst_to_gblex,
-     _gblex_order_map,
-     _seq_gblex_dim_names) = _gather_blex_ordering_info_orig("global")
-
     # Get the blex schedule blueprint (dict will become a map below) and
     # blex order map w.r.t. local and global barriers
     (stmt_inst_to_lblex,
@@ -1149,15 +815,6 @@ def _create_blex_set_from_tuple_pair(before, after, wrap_cond=False):
         all_par_lex_dim_names, gid_lex_dim_names,
         )
 
-    assert _stmt_inst_to_lblex == stmt_inst_to_lblex
-    assert _lblex_order_map == lblex_order_map
-    assert _lblex_order_map.get_var_dict() == lblex_order_map.get_var_dict()
-    assert _seq_lblex_dim_names == seq_lblex_dim_names
-    assert _stmt_inst_to_gblex == stmt_inst_to_gblex
-    assert _gblex_order_map == gblex_order_map
-    assert _gblex_order_map.get_var_dict() == gblex_order_map.get_var_dict()
-    assert _seq_gblex_dim_names == seq_gblex_dim_names
-
     # }}}
 
     # }}}  end intra-group and global blex order creation

From 109d34fbf2de231fd7f78a0979d40548d534f2ed Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Tue, 20 Jul 2021 14:32:46 -0500
Subject: [PATCH 446/460] eliminate _create_excluded_map_for_iname() since it
 is only called once; make it inline instead (step 1, temporarily keep old
 func for sanity check)

---
 loopy/schedule/checker/schedule.py | 105 ++++++++++++++++++++++++++++-
 1 file changed, 103 insertions(+), 2 deletions(-)

diff --git a/loopy/schedule/checker/schedule.py b/loopy/schedule/checker/schedule.py
index f9dad40e1..23acf15f5 100644
--- a/loopy/schedule/checker/schedule.py
+++ b/loopy/schedule/checker/schedule.py
@@ -441,6 +441,10 @@ def _create_excluded_map_for_iname(iname, key_lex_tuples):
         tuples in the key_lex_tuples:
         PRE->FIRST, BOTTOM(iname')->TOP(iname'+1), LAST->POST
         """
+        # (Vars from outside func used here:
+        # blex_set_affs, blex_set_template, iname_to_blex_var,
+        # n_seq_blex_dims, seq_blex_dim_names,
+        # seq_blex_dim_names_prime)
 
         # Note:
         # only key_lex_tuples[slex.FIRST] & key_lex_tuples[slex.LAST] are pwaffs
@@ -525,8 +529,104 @@ def _create_blex_set_from_tuple_pair(before, after, wrap_cond=False):
 
     # Create map to subtract for each iname
     maps_to_subtract = []
-    for iname, subdict in blex_exclusion_info.items():
-        maps_to_subtract.append(_create_excluded_map_for_iname(iname, subdict))
+    for iname, key_lex_tuples in blex_exclusion_info.items():
+        # TODO remove after sanity check
+        _old_map_to_subtract = _create_excluded_map_for_iname(iname, key_lex_tuples)
+
+        # {{{ _create_excluded_map_for_iname
+
+        """Create the blex->blex pairs that must be subtracted from the
+        initial blex order map for this particular loop using the 6 blex
+        tuples in the key_lex_tuples:
+        PRE->FIRST, BOTTOM(iname')->TOP(iname'+1), LAST->POST
+        """
+
+        # Note:
+        # only key_lex_tuples[slex.FIRST] & key_lex_tuples[slex.LAST] are pwaffs
+
+        # {{{ _create_blex_set_from_tuple_pair
+
+        def _create_blex_set_from_tuple_pair(before, after, wrap_cond=False):
+            """Given a before->after tuple pair in the key_lex_tuples, which may
+            have dim vals described by ints, strings (inames), and pwaffs,
+            create an ISL set in blex space that can be converted into
+            the ISL map to be subtracted
+            """
+            # (Vars from outside func used here:
+            # iname, blex_set_affs, blex_set_template, iname_to_blex_var,
+            # n_seq_blex_dims, seq_blex_dim_names,
+            # seq_blex_dim_names_prime)
+
+            # Start with a set representing blex_order_map space
+            blex_set = blex_set_template.copy()
+
+            # Add marks to inames in the 'before' tuple
+            # (all strings should be inames)
+            before_prime = tuple(
+                v+BEFORE_MARK if isinstance(v, str) else v for v in before)
+            before_padded = _pad_tuple_with_zeros(before_prime, n_seq_blex_dims)
+            after_padded = _pad_tuple_with_zeros(after, n_seq_blex_dims)
+
+            # Assign vals in the tuple to dims in the ISL set
+            for dim_name, dim_val in zip(
+                    seq_blex_dim_names_prime+seq_blex_dim_names,
+                    before_padded+after_padded):
+
+                if isinstance(dim_val, int):
+                    # Set idx to int val
+                    blex_set &= blex_set_affs[dim_name].eq_set(
+                        blex_set_affs[0]+dim_val)
+                elif isinstance(dim_val, str):
+                    # This is an iname, set idx to corresponding blex var
+                    blex_set &= blex_set_affs[dim_name].eq_set(
+                        blex_set_affs[iname_to_blex_var[dim_val]])
+                else:
+                    # This is a pwaff iname bound, align and intersect
+                    assert isinstance(dim_val, isl.PwAff)
+                    pwaff_aligned = isl.align_spaces(dim_val, blex_set_affs[0])
+                    # (doesn't matter which blex_set_affs item we align to^)
+                    blex_set &= blex_set_affs[dim_name].eq_set(pwaff_aligned)
+
+            if wrap_cond:
+                # This is the BOTTOM->TOP pair, add condition i = i' + 1
+                blex_set &= blex_set_affs[iname_to_blex_var[iname]].eq_set(
+                    blex_set_affs[iname_to_blex_var[iname+BEFORE_MARK]] + 1)
+
+            return blex_set
+
+        # }}} end _create_blex_set_from_tuple_pair()
+
+        # Create pairs to be subtracted
+        # (set will be converted to map)
+
+        # Enter loop case: PRE->FIRST
+        full_blex_set = _create_blex_set_from_tuple_pair(
+            key_lex_tuples[slex.PRE], key_lex_tuples[slex.FIRST])
+        # Wrap loop case: BOTTOM(iname')->TOP(iname'+1)
+        full_blex_set |= _create_blex_set_from_tuple_pair(
+            key_lex_tuples[slex.BOTTOM], key_lex_tuples[slex.TOP],
+            wrap_cond=True)
+        # Leave loop case: LAST->POST
+        full_blex_set |= _create_blex_set_from_tuple_pair(
+            key_lex_tuples[slex.LAST], key_lex_tuples[slex.POST])
+
+        # Add condition to fix iteration value for *surrounding* loops (j = j')
+        for surrounding_iname in key_lex_tuples[slex.PRE][1::2]:
+            s_blex_var = iname_to_blex_var[surrounding_iname]
+            full_blex_set &= blex_set_affs[s_blex_var].eq_set(
+                blex_set_affs[s_blex_var+BEFORE_MARK])
+
+        # Convert blex set back to map
+        map_to_subtract = isl.Map.from_domain(full_blex_set).move_dims(
+            dt.out, 0, dt.in_, n_blex_dims, n_blex_dims)
+
+        # }}} end _create_excluded_map_for_iname()
+
+        # TODO remove sanity check
+        assert map_to_subtract == _old_map_to_subtract
+        assert map_to_subtract.get_var_dict() == _old_map_to_subtract.get_var_dict()
+
+        maps_to_subtract.append(map_to_subtract)
 
     if maps_to_subtract:
 
@@ -537,6 +637,7 @@ def _create_blex_set_from_tuple_pair(before, after, wrap_cond=False):
 
         # Get transitive closure of maps
         map_to_subtract, closure_exact = map_to_subtract.transitive_closure()
+
         assert closure_exact  # TODO warn instead?
 
         # Subtract closure from blex order map

From aa2c475f8b407cf965182758c7d7ca38e40d67eb Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Tue, 20 Jul 2021 14:45:24 -0500
Subject: [PATCH 447/460] eliminate _create_excluded_map_for_iname() since it
 is only called once; make it inline instead (step 2, after finishing sanity
 check)

---
 loopy/schedule/checker/schedule.py | 115 +++--------------------------
 1 file changed, 10 insertions(+), 105 deletions(-)

diff --git a/loopy/schedule/checker/schedule.py b/loopy/schedule/checker/schedule.py
index 23acf15f5..ab9af51df 100644
--- a/loopy/schedule/checker/schedule.py
+++ b/loopy/schedule/checker/schedule.py
@@ -433,111 +433,16 @@ def _gather_blex_ordering_info(
         ).domain()
     blex_set_affs = isl.affs_from_space(blex_set_template.space)
 
-    # {{{ _create_excluded_map_for_iname
+    # {{{ Create blex map to subtract for each iname in blex_exclusion_info
 
-    def _create_excluded_map_for_iname(iname, key_lex_tuples):
-        """Create the blex->blex pairs that must be subtracted from the
-        initial blex order map for this particular loop using the 6 blex
-        tuples in the key_lex_tuples:
-        PRE->FIRST, BOTTOM(iname')->TOP(iname'+1), LAST->POST
-        """
-        # (Vars from outside func used here:
-        # blex_set_affs, blex_set_template, iname_to_blex_var,
-        # n_seq_blex_dims, seq_blex_dim_names,
-        # seq_blex_dim_names_prime)
-
-        # Note:
-        # only key_lex_tuples[slex.FIRST] & key_lex_tuples[slex.LAST] are pwaffs
-
-        # {{{ _create_blex_set_from_tuple_pair
-
-        def _create_blex_set_from_tuple_pair(before, after, wrap_cond=False):
-            """Given a before->after tuple pair in the key_lex_tuples, which may
-            have dim vals described by ints, strings (inames), and pwaffs,
-            create an ISL set in blex space that can be converted into
-            the ISL map to be subtracted
-            """
-            # (Vars from outside func used here:
-            # iname, blex_set_affs, blex_set_template, iname_to_blex_var,
-            # n_seq_blex_dims, seq_blex_dim_names,
-            # seq_blex_dim_names_prime)
-
-            # Start with a set representing blex_order_map space
-            blex_set = blex_set_template.copy()
-
-            # Add marks to inames in the 'before' tuple
-            # (all strings should be inames)
-            before_prime = tuple(
-                v+BEFORE_MARK if isinstance(v, str) else v for v in before)
-            before_padded = _pad_tuple_with_zeros(before_prime, n_seq_blex_dims)
-            after_padded = _pad_tuple_with_zeros(after, n_seq_blex_dims)
-
-            # Assign vals in the tuple to dims in the ISL set
-            for dim_name, dim_val in zip(
-                    seq_blex_dim_names_prime+seq_blex_dim_names,
-                    before_padded+after_padded):
-
-                if isinstance(dim_val, int):
-                    # Set idx to int val
-                    blex_set &= blex_set_affs[dim_name].eq_set(
-                        blex_set_affs[0]+dim_val)
-                elif isinstance(dim_val, str):
-                    # This is an iname, set idx to corresponding blex var
-                    blex_set &= blex_set_affs[dim_name].eq_set(
-                        blex_set_affs[iname_to_blex_var[dim_val]])
-                else:
-                    # This is a pwaff iname bound, align and intersect
-                    assert isinstance(dim_val, isl.PwAff)
-                    pwaff_aligned = isl.align_spaces(dim_val, blex_set_affs[0])
-                    # (doesn't matter which blex_set_affs item we align to^)
-                    blex_set &= blex_set_affs[dim_name].eq_set(pwaff_aligned)
-
-            if wrap_cond:
-                # This is the BOTTOM->TOP pair, add condition i = i' + 1
-                blex_set &= blex_set_affs[iname_to_blex_var[iname]].eq_set(
-                    blex_set_affs[iname_to_blex_var[iname+BEFORE_MARK]] + 1)
-
-            return blex_set
-
-        # }}} end _create_blex_set_from_tuple_pair()
-
-        # Create pairs to be subtracted
-        # (set will be converted to map)
-
-        # Enter loop case: PRE->FIRST
-        full_blex_set = _create_blex_set_from_tuple_pair(
-            key_lex_tuples[slex.PRE], key_lex_tuples[slex.FIRST])
-        # Wrap loop case: BOTTOM(iname')->TOP(iname'+1)
-        full_blex_set |= _create_blex_set_from_tuple_pair(
-            key_lex_tuples[slex.BOTTOM], key_lex_tuples[slex.TOP],
-            wrap_cond=True)
-        # Leave loop case: LAST->POST
-        full_blex_set |= _create_blex_set_from_tuple_pair(
-            key_lex_tuples[slex.LAST], key_lex_tuples[slex.POST])
-
-        # Add condition to fix iteration value for *surrounding* loops (j = j')
-        for surrounding_iname in key_lex_tuples[slex.PRE][1::2]:
-            s_blex_var = iname_to_blex_var[surrounding_iname]
-            full_blex_set &= blex_set_affs[s_blex_var].eq_set(
-                blex_set_affs[s_blex_var+BEFORE_MARK])
-
-        # Convert blex set back to map
-        return isl.Map.from_domain(full_blex_set).move_dims(
-            dt.out, 0, dt.in_, n_blex_dims, n_blex_dims)
-
-    # }}} end _create_excluded_map_for_iname()
-
-    # Create map to subtract for each iname
     maps_to_subtract = []
     for iname, key_lex_tuples in blex_exclusion_info.items():
-        # TODO remove after sanity check
-        _old_map_to_subtract = _create_excluded_map_for_iname(iname, key_lex_tuples)
 
-        # {{{ _create_excluded_map_for_iname
+        # {{{ Create blex map to subract for one iname
 
         """Create the blex->blex pairs that must be subtracted from the
         initial blex order map for this particular loop using the 6 blex
-        tuples in the key_lex_tuples:
+        tuples in key_lex_tuples:
         PRE->FIRST, BOTTOM(iname')->TOP(iname'+1), LAST->POST
         """
 
@@ -620,14 +525,14 @@ def _create_blex_set_from_tuple_pair(before, after, wrap_cond=False):
         map_to_subtract = isl.Map.from_domain(full_blex_set).move_dims(
             dt.out, 0, dt.in_, n_blex_dims, n_blex_dims)
 
-        # }}} end _create_excluded_map_for_iname()
-
-        # TODO remove sanity check
-        assert map_to_subtract == _old_map_to_subtract
-        assert map_to_subtract.get_var_dict() == _old_map_to_subtract.get_var_dict()
+        # }}}
 
         maps_to_subtract.append(map_to_subtract)
 
+    # }}}
+
+    # {{{ Subtract transitive closure of union of blex maps to subtract
+
     if maps_to_subtract:
 
         # Get union of maps
@@ -636,12 +541,12 @@ def _create_blex_set_from_tuple_pair(before, after, wrap_cond=False):
             map_to_subtract |= other_map
 
         # Get transitive closure of maps
-        map_to_subtract, closure_exact = map_to_subtract.transitive_closure()
+        map_to_subtract_closure, closure_exact = map_to_subtract.transitive_closure()
 
         assert closure_exact  # TODO warn instead?
 
         # Subtract closure from blex order map
-        blex_order_map = blex_order_map - map_to_subtract
+        blex_order_map = blex_order_map - map_to_subtract_closure
 
     # }}}
 

From 7faa802c92809bddf351fbbe2d549683eb932d5b Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Wed, 21 Jul 2021 13:42:35 -0500
Subject: [PATCH 448/460] add for_each_kernel decorator to map_domain

---
 loopy/transform/iname.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/loopy/transform/iname.py b/loopy/transform/iname.py
index be7a57d3f..81b253feb 100644
--- a/loopy/transform/iname.py
+++ b/loopy/transform/iname.py
@@ -1941,6 +1941,7 @@ def _find_aff_subst_from_map(iname, isl_map):
 
 # TODO swap dt and dim_type
 
+@for_each_kernel
 def map_domain(kernel, isl_map, within=None, rename_after={}):
     # FIXME: Express _split_iname_backend in terms of this
     #   Missing/deleted for now:

From a5bdc98f63a271d769d97ebeda1c785faf32db5a Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Wed, 21 Jul 2021 13:42:54 -0500
Subject: [PATCH 449/460] after callables update, use knl[loopy_kernel] where
 needed

---
 test/test_transform.py | 19 ++++++++++++-------
 1 file changed, 12 insertions(+), 7 deletions(-)

diff --git a/test/test_transform.py b/test/test_transform.py
index cda729da8..6d6ad6e02 100644
--- a/test/test_transform.py
+++ b/test/test_transform.py
@@ -609,7 +609,6 @@ def test_map_domain_vs_split_iname():
         c[x,t] = d[x,t]  {id=stmtc}
         e[i] = f[i]
         """,
-        name="wave_equation",
         lang_version=(2018, 2),
         )
     knl = lp.add_and_infer_dtypes(knl, {"b,d,f": np.float32})
@@ -638,7 +637,8 @@ def test_map_domain_vs_split_iname():
 
     # Get a linearization
     proc_knl_map_dom = lp.preprocess_kernel(knl_map_dom)
-    lin_knl_map_dom = lp.get_one_linearized_kernel(proc_knl_map_dom)
+    lin_knl_map_dom = lp.get_one_linearized_kernel(
+        proc_knl_map_dom["loopy_kernel"], proc_knl_map_dom.callables_table)
 
     # }}}
 
@@ -648,13 +648,15 @@ def test_map_domain_vs_split_iname():
     knl_split_iname = lp.split_iname(knl_split_iname, "t", 32)
     knl_split_iname = lp.prioritize_loops(knl_split_iname, "x, t_outer, t_inner")
     proc_knl_split_iname = lp.preprocess_kernel(knl_split_iname)
-    lin_knl_split_iname = lp.get_one_linearized_kernel(proc_knl_split_iname)
+    lin_knl_split_iname = lp.get_one_linearized_kernel(
+        proc_knl_split_iname["loopy_kernel"], proc_knl_split_iname.callables_table)
 
     from loopy.schedule.checker.utils import (
         ensure_dim_names_match_and_align,
     )
     for d_map_domain, d_split_iname in zip(
-            knl_map_dom.domains, knl_split_iname.domains):
+            knl_map_dom["loopy_kernel"].domains,
+            knl_split_iname["loopy_kernel"].domains):
         d_map_domain_aligned = ensure_dim_names_match_and_align(
             d_map_domain, d_split_iname)
         assert d_map_domain_aligned == d_split_iname
@@ -724,7 +726,8 @@ def test_map_domain_with_transform_map_missing_dims():
 
     # Get a linearization
     proc_knl_map_dom = lp.preprocess_kernel(knl_map_dom)
-    lin_knl_map_dom = lp.get_one_linearized_kernel(proc_knl_map_dom)
+    lin_knl_map_dom = lp.get_one_linearized_kernel(
+        proc_knl_map_dom["loopy_kernel"], proc_knl_map_dom.callables_table)
 
     # }}}
 
@@ -739,13 +742,15 @@ def test_map_domain_with_transform_map_missing_dims():
     except AttributeError:
         knl_split_iname = lp.prioritize_loops(knl_split_iname, desired_prio)
     proc_knl_split_iname = lp.preprocess_kernel(knl_split_iname)
-    lin_knl_split_iname = lp.get_one_linearized_kernel(proc_knl_split_iname)
+    lin_knl_split_iname = lp.get_one_linearized_kernel(
+        proc_knl_split_iname["loopy_kernel"], proc_knl_split_iname.callables_table)
 
     from loopy.schedule.checker.utils import (
         ensure_dim_names_match_and_align,
     )
     for d_map_domain, d_split_iname in zip(
-            knl_map_dom.domains, knl_split_iname.domains):
+            knl_map_dom["loopy_kernel"].domains,
+            knl_split_iname["loopy_kernel"].domains):
         d_map_domain_aligned = ensure_dim_names_match_and_align(
             d_map_domain, d_split_iname)
         assert d_map_domain_aligned == d_split_iname

From dd7cbea5e7dfa0095cfd9c7f4f22bc3d1ad3cce6 Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Wed, 21 Jul 2021 13:57:28 -0500
Subject: [PATCH 450/460] remove rename_after arg in map_domain, which is no
 longer necessary due to previous map machinery update

---
 loopy/transform/iname.py | 20 +-------------------
 1 file changed, 1 insertion(+), 19 deletions(-)

diff --git a/loopy/transform/iname.py b/loopy/transform/iname.py
index 81b253feb..fc01f75ab 100644
--- a/loopy/transform/iname.py
+++ b/loopy/transform/iname.py
@@ -1942,7 +1942,7 @@ def _find_aff_subst_from_map(iname, isl_map):
 # TODO swap dt and dim_type
 
 @for_each_kernel
-def map_domain(kernel, isl_map, within=None, rename_after={}):
+def map_domain(kernel, isl_map, within=None):
     # FIXME: Express _split_iname_backend in terms of this
     #   Missing/deleted for now:
     #     - slab processing
@@ -2139,24 +2139,6 @@ def process_set(s):
     kernel = ins.map_kernel(kernel)
     kernel = rule_mapping_context.finish_kernel(kernel)
 
-    # {{{ Rename inames according to rename_after dict
-
-    # This renaming option exists because various isl operations fail when map
-    # dim names are not unique, so even if someone wants their transformation
-    # map to keep one of the inames unchanged, they must give it a new name
-    # in their map, e.g., "[x, t] -> [x_, t_outer, t_inner] : x_ = x ..." (see
-    # test_map_domain_vs_split_iname()). Currently, they can't
-    # simply exclude that iname from the transformation map because, as stated
-    # in the error above, all domains must either involve all or none of the
-    # transform map domain inames. This renaming option lets them, e.g. switch
-    # an iname back to its original name.
-
-    # TODO come up with better solution for this
-    for old_iname, new_iname in rename_after.items():
-        kernel = rename_iname(kernel, old_iname, new_iname, within=within)
-
-    # }}}
-
     return kernel
 
 # }}}

From 4052fb94ff273b0e9927e63500e2e61185b462d8 Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Wed, 21 Jul 2021 13:58:14 -0500
Subject: [PATCH 451/460] remove rename_after arg in map_domain, which is no
 longer necessary due to previous map machinery update

---
 test/test_transform.py | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/test/test_transform.py b/test/test_transform.py
index 6d6ad6e02..feda064e3 100644
--- a/test/test_transform.py
+++ b/test/test_transform.py
@@ -623,14 +623,13 @@ def test_map_domain_vs_split_iname():
     # Create map_domain mapping:
     import islpy as isl
     transform_map = isl.BasicMap(
-        "[nx,nt] -> {[x, t] -> [x_, t_outer, t_inner]: "
-        "x = x_ and "
+        "[nt] -> {[t] -> [t_outer, t_inner]: "
         "0 <= t_inner < 32 and "
         "32*t_outer + t_inner = t and "
         "0 <= 32*t_outer + t_inner < nt}")
 
     # Call map_domain to transform kernel
-    knl_map_dom = lp.map_domain(knl_map_dom, transform_map, rename_after={"x_": "x"})
+    knl_map_dom = lp.map_domain(knl_map_dom, transform_map)
 
     # Prioritize loops (prio should eventually be updated in map_domain?)
     knl_map_dom = lp.prioritize_loops(knl_map_dom, "x, t_outer, t_inner")

From dc634d38fda0546df9ec7dcef2533e96261d6137 Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Wed, 21 Jul 2021 14:34:37 -0500
Subject: [PATCH 452/460] disable new dep updating during precompute

---
 loopy/transform/precompute.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/loopy/transform/precompute.py b/loopy/transform/precompute.py
index f3ab5ba36..6150f6e30 100644
--- a/loopy/transform/precompute.py
+++ b/loopy/transform/precompute.py
@@ -1065,7 +1065,8 @@ def add_assumptions(d):
         kernel = assign_automatic_axes(kernel, callables_table)
 
     # {{{ update dependencies
-
+    # FIXME Handle deps in precompute
+    """
     # Get some values that will be useful later
     fetch_stmt_id = compute_insn_id
     fetch_stmt = kernel.id_to_insn[compute_insn_id]
@@ -1123,7 +1124,7 @@ def add_assumptions(d):
                     set(usage_inames))
 
                 non_shared_inames = set(usage_inames) - shared_inames
-                # Remove the inames from old out dims that will not appear in new out dims
+                # Remove inames from old out dims that won't appear in new out dims
                 for non_shared_iname in non_shared_inames:
                     new_dep = remove_dim_by_name(new_dep, dt.out, non_shared_iname)
 
@@ -1171,6 +1172,7 @@ def add_assumptions(d):
         kernel = lp.add_dependency_v2(
             kernel, usage_stmt_id, fetch_stmt_id, dep_usage_on_fetch)
 
+    """
     # }}}
 
     return kernel

From 248f62275e75259f076e2a19f2eb211ab45fdf63 Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Wed, 21 Jul 2021 14:35:00 -0500
Subject: [PATCH 453/460] disable test for new dep updating during precompute

---
 test/test_linearization_checker.py | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/test/test_linearization_checker.py b/test/test_linearization_checker.py
index 0c544a069..3ed785fe5 100644
--- a/test/test_linearization_checker.py
+++ b/test/test_linearization_checker.py
@@ -2753,6 +2753,10 @@ def test_map_domain_with_stencil_dependencies():
 
 # {{{ test_add_prefetch_with_dependencies
 
+# FIXME handle deps during prefetch
+
+'''
+
 def test_add_prefetch_with_dependencies():
 
     lp.set_caching_enabled(False)
@@ -2816,6 +2820,8 @@ def test_add_prefetch_with_dependencies():
 
     assert not unsatisfied_deps
 
+'''
+
 # }}}
 
 # }}}

From 3c5274a51af38e9df013e54a5d5ab7a478edd6af Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Wed, 21 Jul 2021 14:36:39 -0500
Subject: [PATCH 454/460] remove unnessary kernel names

---
 test/test_linearization_checker.py | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/test/test_linearization_checker.py b/test/test_linearization_checker.py
index 3ed785fe5..76a5498f1 100644
--- a/test/test_linearization_checker.py
+++ b/test/test_linearization_checker.py
@@ -2261,7 +2261,6 @@ def test_split_iname_with_dependencies():
         a[i] = 0.1  {id=stmt0}
         b[i] = a[i]  {id=stmt1,dep=stmt0}
         """,
-        name="example",
         assumptions="p >= 1",
         lang_version=(2018, 2)
         )
@@ -2386,7 +2385,6 @@ def test_split_iname_with_dependencies():
         c[i,k,j,m] = 0.1  {id=stmt2}
         d[i,k,j,m] = c[i,k,j,m]  {id=stmt3,dep=stmt2}
         """,
-        name="example",
         assumptions="p >= 1",
         lang_version=(2018, 2)
         )
@@ -2460,7 +2458,6 @@ def test_map_domain_with_only_partial_dep_pair_affected():
         c[x,t] = d[x,t]  {id=stmtc,dep=stmta}
         e[i] = f[i]  {id=stmte,dep=stmtc}
         """,
-        name="wave_equation",
         lang_version=(2018, 2),
         )
     knl = lp.add_and_infer_dtypes(knl, {"b,d,f": np.float32})
@@ -2651,7 +2648,6 @@ def test_map_domain_with_stencil_dependencies():
             + dt**2/dx**2 * (u[ix+1, it+1] - 2*u[ix, it+1] + u[ix-1, it+1])
             - u[ix, it])  {id=stmt}
         """,
-        name="wave_equation",
         #assumptions="nx,nt >= 3",  # works without these (?)
         lang_version=(2018, 2),
         )
@@ -2767,7 +2763,6 @@ def test_add_prefetch_with_dependencies():
             a[i+1,j+1,k+1,m+1] = a[i,j,k,m]  {id=stmt}
         end
         """,
-        name="example",
         assumptions="p >= 1",
         lang_version=(2018, 2)
         )

From 19194991667c2859f55c19001c9aa637257fb657 Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Wed, 21 Jul 2021 14:39:33 -0500
Subject: [PATCH 455/460] eliminate no-longer-necessary rename_after arg in
 map_domain

---
 test/test_linearization_checker.py | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/test/test_linearization_checker.py b/test/test_linearization_checker.py
index 76a5498f1..d9324f12b 100644
--- a/test/test_linearization_checker.py
+++ b/test/test_linearization_checker.py
@@ -2494,14 +2494,13 @@ def test_map_domain_with_only_partial_dep_pair_affected():
     # Create map_domain mapping:
     import islpy as isl
     transform_map = isl.BasicMap(
-        "[nx,nt] -> {[x, t] -> [x_, t_outer, t_inner]: "
-        "x = x_ and "
+        "[nt] -> {[t] -> [t_outer, t_inner]: "
         "0 <= t_inner < 32 and "
         "32*t_outer + t_inner = t and "
         "0 <= 32*t_outer + t_inner < nt}")
 
     # Call map_domain to transform kernel
-    knl = lp.map_domain(knl, transform_map, rename_after={"x_": "x"})
+    knl = lp.map_domain(knl, transform_map)
 
     # Prioritize loops (prio should eventually be updated in map_domain?)
     knl = lp.prioritize_loops(knl, "x, t_outer, t_inner")

From d94cfa44da671784b9e71c6514429d24fc05f43d Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Mon, 26 Jul 2021 17:57:44 -0500
Subject: [PATCH 456/460] for some reason, I need to commit 'changes' to the
 submodule (but didn't actually do anything to submodule...?)

---
 loopy/target/c/compyte | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/loopy/target/c/compyte b/loopy/target/c/compyte
index 7e48e1166..71bffa1ae 160000
--- a/loopy/target/c/compyte
+++ b/loopy/target/c/compyte
@@ -1 +1 @@
-Subproject commit 7e48e1166a13cfbb7b60f909b071f088034ffda1
+Subproject commit 71bffa1ae64ed98b9d922c79a6f9cc7eb4fd642f

From 47ab67a8fb68fe5ec3cbed5f2a1f33307569f533 Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Mon, 26 Jul 2021 18:25:39 -0500
Subject: [PATCH 457/460] pass kernel['loopy_kernel'] to
 filter_deps_by_intersection_with_SAME()

---
 test/test_linearization_checker.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/test/test_linearization_checker.py b/test/test_linearization_checker.py
index 08f12b67a..8d96e09a0 100644
--- a/test/test_linearization_checker.py
+++ b/test/test_linearization_checker.py
@@ -2893,7 +2893,8 @@ def _dep_with_condition(stmt_before, stmt_after, cond):
     from loopy.schedule.checker.dependency import (
         filter_deps_by_intersection_with_SAME,
     )
-    filtered_depends_on_dict = filter_deps_by_intersection_with_SAME(knl)
+    filtered_depends_on_dict = filter_deps_by_intersection_with_SAME(
+        knl["loopy_kernel"])
 
     # Make sure filtered edges are correct
 
@@ -2973,7 +2974,8 @@ def _dep_with_condition(stmt_before, stmt_after, cond):
     from loopy.schedule.checker.dependency import (
         filter_deps_by_intersection_with_SAME,
     )
-    filtered_depends_on_dict = filter_deps_by_intersection_with_SAME(knl)
+    filtered_depends_on_dict = filter_deps_by_intersection_with_SAME(
+        knl["loopy_kernel"])
 
     # Make sure filtered edges are correct
 

From 2f7c583ca17dcd4107475e2dec6730a5cf7a239f Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Mon, 26 Jul 2021 19:00:26 -0500
Subject: [PATCH 458/460] add @for_each_kernel to constrain_loop_nesting

---
 loopy/transform/iname.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/loopy/transform/iname.py b/loopy/transform/iname.py
index 7a56e9679..16847a3b6 100644
--- a/loopy/transform/iname.py
+++ b/loopy/transform/iname.py
@@ -377,6 +377,7 @@ def _process_iname_set_str(iname_set_str):
 
 # {{{ constrain_loop_nesting
 
+@for_each_kernel
 def constrain_loop_nesting(
         kernel, must_nest=None, must_not_nest=None):
     r"""Add the provided constraints to the kernel.

From 5fe7e52225e5c48a8225e147051775bffd56145a Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Mon, 26 Jul 2021 19:01:00 -0500
Subject: [PATCH 459/460] fixes after kernel callables update

---
 test/test_nest_constraints.py | 145 +++++++++++++++++++---------------
 1 file changed, 82 insertions(+), 63 deletions(-)

diff --git a/test/test_nest_constraints.py b/test/test_nest_constraints.py
index 4f00dbac8..63913b1b4 100644
--- a/test/test_nest_constraints.py
+++ b/test/test_nest_constraints.py
@@ -24,6 +24,7 @@
 import loopy as lp
 import numpy as np
 import pyopencl as cl
+from loopy import preprocess_kernel, get_one_linearized_kernel
 
 import logging
 logger = logging.getLogger(__name__)
@@ -47,6 +48,24 @@
 from loopy.version import LOOPY_USE_LANGUAGE_VERSION_2018_2  # noqa
 
 
+# {{{ Helper functions
+
+def _process_and_linearize(prog, knl_name="loopy_kernel"):
+    # Return linearized kernel
+    proc_prog = preprocess_kernel(prog)
+    lin_prog = get_one_linearized_kernel(
+        proc_prog[knl_name], proc_prog.callables_table)
+    return lin_prog
+
+
+def _linearize_and_get_nestings(prog, knl_name="loopy_kernel"):
+    from loopy.transform.iname import get_iname_nestings
+    lin_knl = _process_and_linearize(prog, knl_name)
+    return get_iname_nestings(lin_knl.linearization)
+
+# }}}
+
+
 # {{{ test_loop_constraint_string_parsing
 
 def test_loop_constraint_string_parsing():
@@ -58,80 +77,80 @@ def test_loop_constraint_string_parsing():
 
     try:
         lp.constrain_loop_nesting(ref_knl, "{g,h,k},{j,i}")
-        assert False
+        raise AssertionError()
     except ValueError as e:
         assert "Unrecognized character(s)" in str(e)
 
     try:
         lp.constrain_loop_nesting(ref_knl, "{g,h,i,k},{j}")
-        assert False
+        raise AssertionError()
     except ValueError as e:
         assert "Unrecognized character(s)" in str(e)
 
     try:
         lp.constrain_loop_nesting(ref_knl, "{g,{h,i,k}")
-        assert False
+        raise AssertionError()
     except ValueError as e:
         assert "Unrecognized character(s)" in str(e)
 
     try:
         lp.constrain_loop_nesting(ref_knl, "{g,~h,i,k}")
-        assert False
+        raise AssertionError()
     except ValueError as e:
         assert "Unrecognized character(s)" in str(e)
 
     try:
         lp.constrain_loop_nesting(ref_knl, "{g,#h,i,k}")
-        assert False
+        raise AssertionError()
     except ValueError as e:
         assert "Unrecognized character(s)" in str(e)
 
     try:
         lp.constrain_loop_nesting(ref_knl, ("{g,{h}", "i,k"))
-        assert False
+        raise AssertionError()
     except ValueError as e:
         assert "Unrecognized character(s)" in str(e)
 
     try:
         lp.constrain_loop_nesting(ref_knl, ("{g,~h}", "i,k"))
-        assert False
+        raise AssertionError()
     except ValueError as e:
         assert "Unrecognized character(s)" in str(e)
 
     try:
         lp.constrain_loop_nesting(ref_knl, ("k", "~{g,h}", "{g,h}"))
-        assert False
+        raise AssertionError()
     except ValueError as e:
         assert "Complement (~) not allowed" in str(e)
 
     try:
         lp.constrain_loop_nesting(ref_knl, ("k", "{i,j,k}", "{g,h}"))
-        assert False
+        raise AssertionError()
     except ValueError as e:
         assert "contains cycle" in str(e)
 
     try:
         lp.constrain_loop_nesting(ref_knl, must_not_nest=("~j,i", "{j,i}"))
-        assert False
+        raise AssertionError()
     except ValueError as e:
         assert ("Complements of sets containing multiple inames "
             "must enclose inames in braces") in str(e)
 
     try:
         lp.constrain_loop_nesting(ref_knl, must_nest=("k", "{h}", "{j,i,}"))
-        assert False
+        raise AssertionError()
     except ValueError as e:
         assert ("Found 2 inames but expected 3") in str(e)
 
     try:
         lp.constrain_loop_nesting(ref_knl, must_nest=("k", "{h}", "{j, x x, i}"))
-        assert False
+        raise AssertionError()
     except ValueError as e:
         assert ("Found 4 inames but expected 3") in str(e)
 
     try:
         lp.constrain_loop_nesting(ref_knl, must_nest="{h}}")
-        assert False
+        raise AssertionError()
     except ValueError as e:
         assert (
             "Unrecognized character(s) ['{', '}', '}'] in nest string {h}}"
@@ -139,7 +158,7 @@ def test_loop_constraint_string_parsing():
 
     try:
         lp.constrain_loop_nesting(ref_knl, must_nest="{h i j,,}")
-        assert False
+        raise AssertionError()
     except ValueError as e:
         assert(
             "Unrecognized character(s) [\'{\', \'}\'] in nest string {h i j,,}"
@@ -147,7 +166,7 @@ def test_loop_constraint_string_parsing():
 
     try:
         lp.constrain_loop_nesting(ref_knl, must_nest=("{h}}", "i"))
-        assert False
+        raise AssertionError()
     except ValueError as e:
         assert (
             "Unrecognized character(s) [\'}\'] in nest string h}"
@@ -163,20 +182,21 @@ def test_loop_constraint_string_parsing():
     lp.constrain_loop_nesting(ref_knl, must_nest="k,h,j")
 
     # Handling spaces
-    knl = lp.constrain_loop_nesting(ref_knl, must_nest=("k", "{h }", " { j , i } "))
+    knl = lp.constrain_loop_nesting(
+        ref_knl, must_nest=("k", "{h }", " { j , i } "))["loopy_kernel"]
     assert list(knl.loop_nest_constraints.must_nest)[0][0].inames == set("k")
     assert list(knl.loop_nest_constraints.must_nest)[0][1].inames == set("h")
     assert list(knl.loop_nest_constraints.must_nest)[0][2].inames == set(["j", "i"])
 
     try:
         knl = lp.constrain_loop_nesting(ref_knl, ("j", "{}"))
-        assert False
+        raise AssertionError()
     except ValueError as e:
         assert "Found 0 inames" in str(e)
 
     try:
         knl = lp.constrain_loop_nesting(ref_knl, ("j", ""))
-        assert False
+        raise AssertionError()
     except ValueError as e:
         assert "Found 0 inames" in str(e)
 
@@ -302,7 +322,7 @@ def test_adding_multiple_nest_constraints_to_knl():
     knl = lp.constrain_loop_nesting(
         knl, must_nest=("x", "y"))
 
-    must_nest_knl = knl.loop_nest_constraints.must_nest
+    must_nest_knl = knl["loopy_kernel"].loop_nest_constraints.must_nest
     from loopy.transform.iname import UnexpandedInameSet
     must_nest_expected = set([
         (UnexpandedInameSet(set(["g"], )), UnexpandedInameSet(set(["h", "i"], ))),
@@ -315,7 +335,7 @@ def test_adding_multiple_nest_constraints_to_knl():
         ])
     assert must_nest_knl == must_nest_expected
 
-    must_not_nest_knl = knl.loop_nest_constraints.must_not_nest
+    must_not_nest_knl = knl["loopy_kernel"].loop_nest_constraints.must_not_nest
     must_not_nest_expected = set([
         (UnexpandedInameSet(set(["k", "i"], )), UnexpandedInameSet(set(["k", "i"], ),
             complement=True)),
@@ -348,7 +368,7 @@ def test_incompatible_nest_constraints():
     try:
         knl = lp.constrain_loop_nesting(
             knl, must_nest=("k", "h"))  # (should fail)
-        assert False
+        raise AssertionError()
     except ValueError as e:
         assert "Nest constraint conflict detected" in str(e)
 
@@ -359,7 +379,7 @@ def test_incompatible_nest_constraints():
     try:
         knl = lp.constrain_loop_nesting(
             knl, must_nest=("j", "g"))  # (should fail)
-        assert False
+        raise AssertionError()
     except ValueError as e:
         assert "Nest constraint cycle detected" in str(e)
 
@@ -407,24 +427,24 @@ def is_innermost(iname, lin_items):
 
     knl = ref_knl
     knl = lp.tag_inames(knl, {"h": "vec"})
-    lin_knl = lp.get_one_linearized_kernel(lp.preprocess_kernel(knl))
+    lin_knl = _process_and_linearize(knl)
     assert is_innermost("h", lin_knl.linearization)
 
     knl = ref_knl
     knl = lp.tag_inames(knl, {"h": "vec", "g": "l.1", "i": "l.0"})
-    lin_knl = lp.get_one_linearized_kernel(lp.preprocess_kernel(knl))
+    lin_knl = _process_and_linearize(knl)
     assert is_innermost("h", lin_knl.linearization)
 
     knl = ref_knl
     knl = lp.tag_inames(
         knl, {"h": "vec", "g": "l.1", "i": "l.0", "k": "unr"})
-    lin_knl = lp.get_one_linearized_kernel(lp.preprocess_kernel(knl))
+    lin_knl = _process_and_linearize(knl)
     assert is_innermost("h", lin_knl.linearization)
 
     knl = ref_knl
     knl = lp.tag_inames(knl, {"h": "vec"})
     knl = lp.constrain_loop_nesting(knl, must_nest=("k", "i"))
-    lin_knl = lp.get_one_linearized_kernel(lp.preprocess_kernel(knl))
+    lin_knl = _process_and_linearize(knl)
     assert is_innermost("h", lin_knl.linearization)
     lp.set_caching_enabled(True)
 
@@ -433,7 +453,7 @@ def is_innermost(iname, lin_items):
     knl = lp.tag_inames(knl, {"h": "vec"})
     try:
         lp.constrain_loop_nesting(knl, must_nest=("{g,h,i,j}", "{k}"))
-        assert False
+        raise AssertionError()
     except ValueError as e:
         assert (
             "iname h tagged with ConcurrentTag, "
@@ -446,7 +466,7 @@ def is_innermost(iname, lin_items):
     knl = lp.constrain_loop_nesting(knl, must_nest=("{g,h,i,j}", "{k}"))
     try:
         lp.tag_inames(knl, {"h": "vec"})
-        assert False
+        raise AssertionError()
     except ValueError as e:
         assert (
             "cannot tag 'h' as concurrent--iname involved "
@@ -487,7 +507,7 @@ def loop_order(lin_items):
         knl,
         must_nest=("i", "j", "h", "k", "g"),
         )
-    lin_knl = lp.get_one_linearized_kernel(lp.preprocess_kernel(knl))
+    lin_knl = _process_and_linearize(knl)
     assert loop_order(lin_knl.linearization) == ["i", "j", "h", "k", "g"]
 
     knl = ref_knl
@@ -495,7 +515,7 @@ def loop_order(lin_items):
         knl,
         must_nest=("k", "{g, h, i, j}"),
         )
-    lin_knl = lp.get_one_linearized_kernel(lp.preprocess_kernel(knl))
+    lin_knl = _process_and_linearize(knl)
     assert loop_order(lin_knl.linearization)[0] == "k"
 
     knl = ref_knl
@@ -503,7 +523,7 @@ def loop_order(lin_items):
         knl,
         must_nest=("{g, h, i, j}", "k"),
         )
-    lin_knl = lp.get_one_linearized_kernel(lp.preprocess_kernel(knl))
+    lin_knl = _process_and_linearize(knl)
     assert loop_order(lin_knl.linearization)[-1] == "k"
 
     knl = ref_knl
@@ -511,7 +531,7 @@ def loop_order(lin_items):
         knl,
         must_nest=("{g, h, i}", "{j, k}"),
         )
-    lin_knl = lp.get_one_linearized_kernel(lp.preprocess_kernel(knl))
+    lin_knl = _process_and_linearize(knl)
     assert set(loop_order(lin_knl.linearization)[-2:]) == set(["j", "k"])
 
     knl = ref_knl
@@ -523,7 +543,7 @@ def loop_order(lin_items):
         knl,
         must_nest=("i", "{g, h}"),
         )
-    lin_knl = lp.get_one_linearized_kernel(lp.preprocess_kernel(knl))
+    lin_knl = _process_and_linearize(knl)
     assert set(loop_order(lin_knl.linearization)[3:]) == set(["j", "k"])
     assert set(loop_order(lin_knl.linearization)[1:3]) == set(["g", "h"])
     assert loop_order(lin_knl.linearization)[0] == "i"
@@ -533,7 +553,7 @@ def loop_order(lin_items):
         knl,
         must_nest=("i", "{g, h}", "{j, k}"),
         )
-    lin_knl = lp.get_one_linearized_kernel(lp.preprocess_kernel(knl))
+    lin_knl = _process_and_linearize(knl)
     assert set(loop_order(lin_knl.linearization)[3:]) == set(["j", "k"])
     assert set(loop_order(lin_knl.linearization)[1:3]) == set(["g", "h"])
     assert loop_order(lin_knl.linearization)[0] == "i"
@@ -545,7 +565,7 @@ def loop_order(lin_items):
         knl,
         must_not_nest=("~k", "k"),
         )
-    lin_knl = lp.get_one_linearized_kernel(lp.preprocess_kernel(knl))
+    lin_knl = _process_and_linearize(knl)
     assert loop_order(lin_knl.linearization)[0] == "k"
 
     knl = ref_knl
@@ -553,7 +573,7 @@ def loop_order(lin_items):
         knl,
         must_not_nest=("k", "~k"),
         )
-    lin_knl = lp.get_one_linearized_kernel(lp.preprocess_kernel(knl))
+    lin_knl = _process_and_linearize(knl)
     assert loop_order(lin_knl.linearization)[-1] == "k"
 
     knl = ref_knl
@@ -561,7 +581,7 @@ def loop_order(lin_items):
         knl,
         must_not_nest=("{j, k}", "~{j, k}"),
         )
-    lin_knl = lp.get_one_linearized_kernel(lp.preprocess_kernel(knl))
+    lin_knl = _process_and_linearize(knl)
     assert set(loop_order(lin_knl.linearization)[-2:]) == set(["j", "k"])
 
     knl = ref_knl
@@ -573,7 +593,7 @@ def loop_order(lin_items):
         knl,
         must_nest=("i", "{g, h}"),
         )
-    lin_knl = lp.get_one_linearized_kernel(lp.preprocess_kernel(knl))
+    lin_knl = _process_and_linearize(knl)
     assert set(loop_order(lin_knl.linearization)[3:]) == set(["j", "k"])
     assert set(loop_order(lin_knl.linearization)[1:3]) == set(["g", "h"])
     assert loop_order(lin_knl.linearization)[0] == "i"
@@ -585,7 +605,7 @@ def loop_order(lin_items):
         must_nest=("{g, h, i}", "{j, k}"),
         must_not_nest=("i", "{g, h}"),
         )
-    lin_knl = lp.get_one_linearized_kernel(lp.preprocess_kernel(knl))
+    lin_knl = _process_and_linearize(knl)
     assert set(loop_order(lin_knl.linearization)[3:]) == set(["j", "k"])
     assert set(loop_order(lin_knl.linearization)[0:2]) == set(["g", "h"])
     assert loop_order(lin_knl.linearization)[2] == "i"
@@ -595,7 +615,7 @@ def loop_order(lin_items):
         knl,
         must_not_nest=("i", "~i"),
         )
-    lin_knl = lp.get_one_linearized_kernel(lp.preprocess_kernel(knl))
+    lin_knl = _process_and_linearize(knl)
     assert loop_order(lin_knl.linearization)[-1] == "i"
 
     # contradictory must_not_nest
@@ -611,11 +631,13 @@ def loop_order(lin_items):
         )
 
     try:
-        lp.get_one_linearized_kernel(
-            lp.preprocess_kernel(knl),
+        proc_prog = preprocess_kernel(knl)
+        get_one_linearized_kernel(
+            proc_prog["loopy_kernel"],
+            proc_prog.callables_table,
             debug_args={"interactive": False},
             )
-        assert False
+        raise AssertionError()
     except RuntimeError as e:
         assert "no valid schedules found" in str(e)
 
@@ -625,17 +647,6 @@ def loop_order(lin_items):
 # {{{ test constraint updating during transformation
 
 
-# {{{ helper functions
-
-def _linearize_and_get_nestings(unlinearized_knl):
-    from loopy.transform.iname import get_iname_nestings
-    lin_knl = lp.get_one_linearized_kernel(
-        lp.preprocess_kernel(unlinearized_knl))
-    return get_iname_nestings(lin_knl.linearization)
-
-# }}}
-
-
 # {{{ test_constraint_updating_split_iname
 
 def test_constraint_updating_split_iname():
@@ -770,7 +781,8 @@ def test_constraint_updating_duplicate_inames():
         (iname, set()) for iname in ["g", "h", "j", "k", "gg", "hh"]])
     must_nest_graph_exp["i"] = set(["g", "h", "j", "k", "gg", "hh"])
 
-    assert knl.loop_nest_constraints.must_nest_graph == must_nest_graph_exp
+    assert knl[
+        "loopy_kernel"].loop_nest_constraints.must_nest_graph == must_nest_graph_exp
 
     nesting_for_insn, nesting_for_insn0 = _linearize_and_get_nestings(knl)
 
@@ -802,7 +814,8 @@ def test_constraint_updating_duplicate_inames():
         (iname, set()) for iname in ["j", "k", "gg", "hh"]])
     must_nest_graph_exp["i"] = set(["j", "k", "gg", "hh"])
 
-    assert knl.loop_nest_constraints.must_nest_graph == must_nest_graph_exp
+    assert knl[
+        "loopy_kernel"].loop_nest_constraints.must_nest_graph == must_nest_graph_exp
 
     loop_nestings = _linearize_and_get_nestings(knl)
     assert len(loop_nestings) == 1
@@ -891,7 +904,7 @@ def test_constraint_handling_tag_inames():
         )
     try:
         lp.tag_inames(knl, {"i": "l.0"})
-        assert False
+        raise AssertionError()
     except ValueError as e:
         assert (
             "cannot tag 'i' as concurrent--iname involved in must-nest constraint"
@@ -965,7 +978,7 @@ def test_constraint_updating_join_inames():
         )
     try:
         lp.join_inames(knl, inames=["i", "k"], new_iname="ik")
-        assert False
+        raise AssertionError()
     except ValueError as e:
         assert "cycle" in str(e)
 
@@ -977,7 +990,7 @@ def test_constraint_updating_join_inames():
         )
     try:
         lp.join_inames(knl, inames=["i", "k"], new_iname="ik")
-        assert False
+        raise AssertionError()
     except ValueError as e:
         assert "Implied nestings violate existing must-not-nest" in str(e)
 
@@ -1012,6 +1025,7 @@ def get_sets_of_inames(iname_sets_tuple, iname_universe):
         must_nest=("i", "g", "h", "j", "k"),
         )
     knl = lp.join_inames(knl, inames=["g", "h"], new_iname="gh")
+    knl = knl["loopy_kernel"]
     new_must_nest = get_sets_of_inames(
         list(knl.loop_nest_constraints.must_nest)[0], knl.all_inames())
     expected_must_nest = [
@@ -1024,6 +1038,7 @@ def get_sets_of_inames(iname_sets_tuple, iname_universe):
         must_nest=("{i, g}", "h", "j", "k"),
         )
     knl = lp.join_inames(knl, inames=["g", "h"], new_iname="gh")
+    knl = knl["loopy_kernel"]
     new_must_nest = get_sets_of_inames(
         list(knl.loop_nest_constraints.must_nest)[0], knl.all_inames())
     expected_must_nest = [
@@ -1036,6 +1051,7 @@ def get_sets_of_inames(iname_sets_tuple, iname_universe):
         must_nest=("i", "g", "{h, j}", "k"),
         )
     knl = lp.join_inames(knl, inames=["g", "h"], new_iname="gh")
+    knl = knl["loopy_kernel"]
     new_must_nest = get_sets_of_inames(
         list(knl.loop_nest_constraints.must_nest)[0], knl.all_inames())
     expected_must_nest = [
@@ -1048,6 +1064,7 @@ def get_sets_of_inames(iname_sets_tuple, iname_universe):
         must_nest=("i", "g", "{h, j, k}"),
         )
     knl = lp.join_inames(knl, inames=["g", "h"], new_iname="gh")
+    knl = knl["loopy_kernel"]
     new_must_nest = get_sets_of_inames(
         list(knl.loop_nest_constraints.must_nest)[0], knl.all_inames())
     expected_must_nest = [
@@ -1060,6 +1077,7 @@ def get_sets_of_inames(iname_sets_tuple, iname_universe):
         must_nest=("i", "{g, h}", "j", "k"),
         )
     knl = lp.join_inames(knl, inames=["g", "h"], new_iname="gh")
+    knl = knl["loopy_kernel"]
     new_must_nest = get_sets_of_inames(
         list(knl.loop_nest_constraints.must_nest)[0], knl.all_inames())
     expected_must_nest = [
@@ -1072,6 +1090,7 @@ def get_sets_of_inames(iname_sets_tuple, iname_universe):
         must_nest=("{i, g}", "{h, j, k}"),
         )
     knl = lp.join_inames(knl, inames=["g", "h"], new_iname="gh")
+    knl = knl["loopy_kernel"]
     new_must_nest = get_sets_of_inames(
         list(knl.loop_nest_constraints.must_nest)[0], knl.all_inames())
     expected_must_nest = [
@@ -1085,7 +1104,7 @@ def get_sets_of_inames(iname_sets_tuple, iname_universe):
         )
     try:
         knl = lp.join_inames(knl, inames=["g", "h"], new_iname="gh")
-        assert False
+        raise AssertionError()
     except ValueError as e:
         assert "contains cycle" in str(e)
 
@@ -1096,7 +1115,7 @@ def get_sets_of_inames(iname_sets_tuple, iname_universe):
         )
     try:
         knl = lp.join_inames(knl, inames=["g", "h"], new_iname="gh")
-        assert False
+        raise AssertionError()
     except ValueError as e:
         assert "contains cycle" in str(e)
 
@@ -1107,7 +1126,7 @@ def get_sets_of_inames(iname_sets_tuple, iname_universe):
         )
     try:
         knl = lp.join_inames(knl, inames=["g", "h"], new_iname="gh")
-        assert False
+        raise AssertionError()
     except ValueError as e:
         assert "nestings violate existing must-nest" in str(e)
 
@@ -1118,7 +1137,7 @@ def get_sets_of_inames(iname_sets_tuple, iname_universe):
         )
     try:
         knl = lp.join_inames(knl, inames=["g", "h"], new_iname="gh")
-        assert False
+        raise AssertionError()
     except ValueError as e:
         assert "nestings violate existing must-not-nest" in str(e)
 

From 2f5e5c0a874766968869efb3c3c7691d97f437ff Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Mon, 26 Jul 2021 19:02:31 -0500
Subject: [PATCH 460/460] add TODO

---
 test/test_nest_constraints.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/test/test_nest_constraints.py b/test/test_nest_constraints.py
index 63913b1b4..a931e9e72 100644
--- a/test/test_nest_constraints.py
+++ b/test/test_nest_constraints.py
@@ -954,6 +954,7 @@ def test_constraint_updating_join_inames():
         must_nest=("{g, h, i}", "{j, k}"),
         )
     knl = lp.join_inames(knl, inames=["j", "k"], new_iname="jk")
+    # TODO figure out reason for jk key error
     loop_nesting = _linearize_and_get_nestings(knl)[0]  # only one nesting
     assert loop_nesting[0] == "i"
     assert loop_nesting[1:3] == ("g", "h")