From f69cae9e6c18ddcbc79d3acd7b641232775337f0 Mon Sep 17 00:00:00 2001 From: jdsteve2 Date: Fri, 23 Apr 2021 20:40:43 -0500 Subject: [PATCH 01/22] create convert_set_back_to_map() --- loopy/schedule/checker/utils.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/loopy/schedule/checker/utils.py b/loopy/schedule/checker/utils.py index b4ff9636d..f077b875c 100644 --- a/loopy/schedule/checker/utils.py +++ b/loopy/schedule/checker/utils.py @@ -164,6 +164,7 @@ def sorted_union_of_names_in_isl_sets( def convert_map_to_set(isl_map): + # also works for spaces n_in_dims = len(isl_map.get_var_names(dt.in_)) n_out_dims = len(isl_map.get_var_names(dt.out)) return isl_map.move_dims( @@ -171,6 +172,11 @@ def convert_map_to_set(isl_map): ).domain(), n_in_dims, n_out_dims +def convert_set_back_to_map(isl_set, n_old_in_dims, n_old_out_dims): + return isl.Map.from_domain( + isl_set).move_dims(dt.out, 0, dt.in_, n_old_in_dims, n_old_out_dims) + + def create_symbolic_map_from_tuples( tuple_pairs_with_domains, space, From 94c374efba3023f3c3f9531832309f8287c4fd48 Mon Sep 17 00:00:00 2001 From: jdsteve2 Date: Fri, 23 Apr 2021 20:42:14 -0500 Subject: [PATCH 02/22] create dependency.py; create function filter_deps_by_intersection_with_SAME() based on simplification of work from old linearization checker branch(es) --- loopy/schedule/checker/dependency.py | 122 +++++++++++++++++++++++++++ 1 file changed, 122 insertions(+) create mode 100644 loopy/schedule/checker/dependency.py diff --git a/loopy/schedule/checker/dependency.py b/loopy/schedule/checker/dependency.py new file mode 100644 index 000000000..a0b26c109 --- /dev/null +++ b/loopy/schedule/checker/dependency.py @@ -0,0 +1,122 @@ +__copyright__ = "Copyright (C) 2019 James Stevens" + +__license__ = """ +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +""" + +import islpy as isl + + +class DependencyType: + """Strings specifying a particular type of dependency relationship. + + .. attribute:: SAME + + A :class:`str` specifying the following dependency relationship: + + If ``S = {i, j, ...}`` is a set of inames used in both statements + ``insn0`` and ``insn1``, and ``{i', j', ...}`` represent the values + of the inames in ``insn0``, and ``{i, j, ...}`` represent the + values of the inames in ``insn1``, then the dependency + ``insn0 happens before insn1 iff SAME({i, j})`` specifies that + ``insn0 happens before insn1 iff {i' = i and j' = j and ...}``. + Note that ``SAME({}) = True``. + + .. attribute:: PRIOR + + A :class:`str` specifying the following dependency relationship: + + If ``S = {i, j, k, ...}`` is a set of inames used in both statements + ``insn0`` and ``insn1``, and ``{i', j', k', ...}`` represent the values + of the inames in ``insn0``, and ``{i, j, k, ...}`` represent the + values of the inames in ``insn1``, then the dependency + ``insn0 happens before insn1 iff PRIOR({i, j, k})`` specifies one of + two possibilities, depending on whether the loop nest ordering is + known. If the loop nest ordering is unknown, then + ``insn0 happens before insn1 iff {i' < i and j' < j and k' < k ...}``. + If the loop nest ordering is known, the condition becomes + ``{i', j', k', ...}`` is lexicographically less than ``{i, j, k, ...}``, + i.e., ``i' < i or (i' = i and j' < j) or (i' = i and j' = j and k' < k) ...``. + + """ + + SAME = "same" + PRIOR = "prior" + + +def filter_deps_by_intersection_with_SAME(knl): + # Determine which dep relations have a non-empty intersection with + # the SAME relation + # TODO document + + from loopy.schedule.checker.utils import ( + append_mark_to_strings, + partition_inames_by_concurrency, + create_elementwise_comparison_conjunction_set, + convert_map_to_set, + convert_set_back_to_map, + ) + from loopy.schedule.checker.schedule import ( + BEFORE_MARK, + ) + _, non_conc_inames = partition_inames_by_concurrency(knl) + + deps_filtered = set() + for stmt in knl.instructions: + if hasattr(stmt, 'dependencies') and stmt.dependencies: + depender_id = stmt.id + for dependee_id, dep_maps in stmt.dependencies: + # Continue if we already have this pair + if (dependee_id, depender_id) in deps_filtered: + continue + for dep_map in dep_maps: + # Create isl map representing "SAME" dep for these two insns + + # Get shared nonconcurrent inames + depender_inames = knl.id_to_insn[depender_id].within_inames + dependee_inames = knl.id_to_insn[dependee_id].within_inames + shared_nc_inames = ( + depender_inames & dependee_inames & non_conc_inames) + + # Temporarily convert to set + dep_set_space, n_in_dims, n_out_dims = convert_map_to_set( + dep_map.space) + + # Create SAME relation + same_set_affs = isl.affs_from_space(dep_set_space) + same_set = create_elementwise_comparison_conjunction_set( + shared_nc_inames, + append_mark_to_strings(shared_nc_inames, BEFORE_MARK), + same_set_affs) + + # Convert back to map + same_map = convert_set_back_to_map( + same_set, n_in_dims, n_out_dims) + + # Don't need to intersect same_map with iname bounds (I think..?) + + # See whether the intersection of dep map and SAME is empty + intersect_dep_and_same = same_map & dep_map + intersect_not_empty = not bool(intersect_dep_and_same.is_empty()) + + if intersect_not_empty: + deps_filtered.append((dependee_id, depender_id)) + break # No need to check any more deps for this pair + + return deps_filtered From 2ed669d01f9d34c45710b087fe23027f872eabc0 Mon Sep 17 00:00:00 2001 From: jdsteve2 Date: Fri, 23 Apr 2021 20:46:40 -0500 Subject: [PATCH 03/22] fix flake8 issue --- loopy/schedule/checker/dependency.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/loopy/schedule/checker/dependency.py b/loopy/schedule/checker/dependency.py index a0b26c109..37acc1664 100644 --- a/loopy/schedule/checker/dependency.py +++ b/loopy/schedule/checker/dependency.py @@ -79,7 +79,7 @@ def filter_deps_by_intersection_with_SAME(knl): deps_filtered = set() for stmt in knl.instructions: - if hasattr(stmt, 'dependencies') and stmt.dependencies: + if hasattr(stmt, "dependencies") and stmt.dependencies: depender_id = stmt.id for dependee_id, dep_maps in stmt.dependencies: # Continue if we already have this pair From 196e17e302333459aeb15ceda058663537fd5471 Mon Sep 17 00:00:00 2001 From: jdsteve2 Date: Fri, 23 Apr 2021 20:47:31 -0500 Subject: [PATCH 04/22] (WIP) start working on cartoon dag creation --- loopy/schedule/__init__.py | 24 +++++++++++++++++++++++- 1 file changed, 23 insertions(+), 1 deletion(-) diff --git a/loopy/schedule/__init__.py b/loopy/schedule/__init__.py index c6a9ec3ac..7a2c9b80e 100644 --- a/loopy/schedule/__init__.py +++ b/loopy/schedule/__init__.py @@ -1984,13 +1984,35 @@ def generate_loop_schedules_inner(kernel, debug_args={}): loop_nest_with_map = find_loop_nest_with_map(kernel) loop_nest_around_map = find_loop_nest_around_map(kernel) + + # {{{ create dependency graph with edges from depender* to dependee* + # iff intersection (SAME_map & DEP_map) is not empty + + from loopy.schedule.checker.dependency import ( + filter_deps_by_intersection_with_SAME, + ) + from loopy.schedule.checker.utils import ( + create_graph_from_pairs, + ) + + # Get dep graph edges with edges from depender->dependee + dep_graph_pairs = filter_deps_by_intersection_with_SAME(kernel) + + # Create dep graph from edges + insn_depends_on_graph = create_graph_from_pairs(dep_graph_pairs) + # TODO create ^this func + + # }}} + sched_state = SchedulerState( kernel=kernel, loop_nest_around_map=loop_nest_around_map, loop_insn_dep_map=find_loop_insn_dep_map( kernel, loop_nest_with_map=loop_nest_with_map, - loop_nest_around_map=loop_nest_around_map), + loop_nest_around_map=loop_nest_around_map, + insn_depends_on_graph=insn_depends_on_graph), # TODO deal with this + insn_depends_on_graph=insn_depends_on_graph, # TODO deal with this breakable_inames=ilp_inames, ilp_inames=ilp_inames, vec_inames=vec_inames, From fd754fa89f2cf583cd51b3f9d0f05bb74145d605 Mon Sep 17 00:00:00 2001 From: jdsteve2 Date: Sat, 24 Apr 2021 01:22:23 -0500 Subject: [PATCH 05/22] fix minor bugs --- loopy/schedule/checker/dependency.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/loopy/schedule/checker/dependency.py b/loopy/schedule/checker/dependency.py index 37acc1664..1aea2520d 100644 --- a/loopy/schedule/checker/dependency.py +++ b/loopy/schedule/checker/dependency.py @@ -81,7 +81,7 @@ def filter_deps_by_intersection_with_SAME(knl): for stmt in knl.instructions: if hasattr(stmt, "dependencies") and stmt.dependencies: depender_id = stmt.id - for dependee_id, dep_maps in stmt.dependencies: + for dependee_id, dep_maps in stmt.dependencies.items(): # Continue if we already have this pair if (dependee_id, depender_id) in deps_filtered: continue @@ -116,7 +116,7 @@ def filter_deps_by_intersection_with_SAME(knl): intersect_not_empty = not bool(intersect_dep_and_same.is_empty()) if intersect_not_empty: - deps_filtered.append((dependee_id, depender_id)) + deps_filtered.add((dependee_id, depender_id)) break # No need to check any more deps for this pair return deps_filtered From 239b6737acbc8a455e30db60326fc1c9b497950b Mon Sep 17 00:00:00 2001 From: jdsteve2 Date: Sat, 24 Apr 2021 01:22:54 -0500 Subject: [PATCH 06/22] add test_filtering_deps_by_same() --- test/test_linearization_checker.py | 79 ++++++++++++++++++++++++++++++ 1 file changed, 79 insertions(+) diff --git a/test/test_linearization_checker.py b/test/test_linearization_checker.py index 5daa9b890..de3a9f416 100644 --- a/test/test_linearization_checker.py +++ b/test/test_linearization_checker.py @@ -2251,6 +2251,85 @@ def test_map_domain_with_stencil_dependencies(): # }}} + +# {{{ Dependency handling during linearization + +# {{{ test_filtering_deps_by_same + +def test_filtering_deps_by_same(): + + # Make a kernel (just need something that can carry deps) + knl = lp.make_kernel( + "{[i,j,k,m] : 0 <= i,j,k,m < n}", + """ + a[i,j,k,m] = 1 {id=s1} + a[i,j,k,m] = 2 {id=s2} + a[i,j,k,m] = 3 {id=s3} + a[i,j,k,m] = 4 {id=s4} + a[i,j,k,m] = 5 {id=s5} + """) + knl = lp.add_and_infer_dtypes(knl, {"a": np.float32}) + knl = lp.tag_inames(knl, "m:l.0") + + # Make some deps + + def _dep_with_condition(cond): + return _isl_map_with_marked_dims( + "[n] -> {{" + "[{0}'=0, i', j', k', m'] -> [{0}=0, i, j, k, m] : " + "0 <= i,j,k,m,i',j',k',m' < n and {1}" + "}}".format(STATEMENT_VAR_NAME, cond)) + + dep_s2_on_s1_1 = _dep_with_condition("i' < i and j' <= j and k' = k and m' < m") + dep_s2_on_s1_2 = _dep_with_condition("i' <= i and j' <= j and k' = k and m' < m") + + dep_s2_on_s2_1 = _dep_with_condition("i' < i and j' <= j and k' = k and m' < m") + dep_s2_on_s2_2 = _dep_with_condition("i' <= i and j' <= j and k' = k and m' < m") + + dep_s3_on_s2_1 = _dep_with_condition("i' < i and j' < j and k' = k and m' < m") + dep_s3_on_s2_2 = _dep_with_condition("i' = i and j' = j and k' < k and m' < m") + + dep_s4_on_s3_1 = _dep_with_condition("i' <= i and j' <= j and k' = k") + dep_s4_on_s3_2 = _dep_with_condition("i' <= i") + + dep_s5_on_s4_1 = _dep_with_condition("i' < i") + + knl = lp.add_dependency_v2(knl, "s2", "s1", dep_s2_on_s1_1) + knl = lp.add_dependency_v2(knl, "s2", "s1", dep_s2_on_s1_2) + + knl = lp.add_dependency_v2(knl, "s2", "s2", dep_s2_on_s2_1) + knl = lp.add_dependency_v2(knl, "s2", "s2", dep_s2_on_s2_2) + + knl = lp.add_dependency_v2(knl, "s3", "s2", dep_s3_on_s2_1) + knl = lp.add_dependency_v2(knl, "s3", "s2", dep_s3_on_s2_2) + + knl = lp.add_dependency_v2(knl, "s4", "s3", dep_s4_on_s3_1) + knl = lp.add_dependency_v2(knl, "s4", "s3", dep_s4_on_s3_2) + + knl = lp.add_dependency_v2(knl, "s5", "s4", dep_s5_on_s4_1) + + # Filter deps by intersection with SAME + + from loopy.schedule.checker.dependency import ( + filter_deps_by_intersection_with_SAME, + ) + dep_edges_filtered = filter_deps_by_intersection_with_SAME(knl) + + # Make sure filtered edges are correct + + # (m is concurrent so shouldn't matter) + dep_edges_expected = set([ + ("s1", "s2"), + ("s2", "s2"), + ("s3", "s4"), + ]) + + assert dep_edges_filtered == dep_edges_expected + +# }}} + +# }}} + # }}} From d3c1adcd6ca3948d58afa40ab03e5f27c1ea0608 Mon Sep 17 00:00:00 2001 From: jdsteve2 Date: Sat, 24 Apr 2021 01:53:09 -0500 Subject: [PATCH 07/22] reverse order of cartoon graph dict so it maps depender->dependee --- loopy/schedule/checker/dependency.py | 13 ++++++++++--- test/test_linearization_checker.py | 18 +++++++++++------- 2 files changed, 21 insertions(+), 10 deletions(-) diff --git a/loopy/schedule/checker/dependency.py b/loopy/schedule/checker/dependency.py index 1aea2520d..97b5321bd 100644 --- a/loopy/schedule/checker/dependency.py +++ b/loopy/schedule/checker/dependency.py @@ -77,14 +77,21 @@ def filter_deps_by_intersection_with_SAME(knl): ) _, non_conc_inames = partition_inames_by_concurrency(knl) - deps_filtered = set() + # NOTE: deps filtered will map depender->dependee + deps_filtered = {} for stmt in knl.instructions: + if hasattr(stmt, "dependencies") and stmt.dependencies: + depender_id = stmt.id + for dependee_id, dep_maps in stmt.dependencies.items(): + # Continue if we already have this pair - if (dependee_id, depender_id) in deps_filtered: + if dependee_id in deps_filtered.keys() and ( + depender_id in deps_filtered[dependee_id]): continue + for dep_map in dep_maps: # Create isl map representing "SAME" dep for these two insns @@ -116,7 +123,7 @@ def filter_deps_by_intersection_with_SAME(knl): intersect_not_empty = not bool(intersect_dep_and_same.is_empty()) if intersect_not_empty: - deps_filtered.add((dependee_id, depender_id)) + deps_filtered.setdefault(depender_id, set()).add(dependee_id) break # No need to check any more deps for this pair return deps_filtered diff --git a/test/test_linearization_checker.py b/test/test_linearization_checker.py index de3a9f416..383278326 100644 --- a/test/test_linearization_checker.py +++ b/test/test_linearization_checker.py @@ -2294,6 +2294,8 @@ def _dep_with_condition(cond): dep_s5_on_s4_1 = _dep_with_condition("i' < i") + dep_s5_on_s2_1 = _dep_with_condition("i' = i") + knl = lp.add_dependency_v2(knl, "s2", "s1", dep_s2_on_s1_1) knl = lp.add_dependency_v2(knl, "s2", "s1", dep_s2_on_s1_2) @@ -2308,23 +2310,25 @@ def _dep_with_condition(cond): knl = lp.add_dependency_v2(knl, "s5", "s4", dep_s5_on_s4_1) + knl = lp.add_dependency_v2(knl, "s5", "s2", dep_s5_on_s2_1) + # Filter deps by intersection with SAME from loopy.schedule.checker.dependency import ( filter_deps_by_intersection_with_SAME, ) - dep_edges_filtered = filter_deps_by_intersection_with_SAME(knl) + filtered_depends_on_dict = filter_deps_by_intersection_with_SAME(knl) # Make sure filtered edges are correct # (m is concurrent so shouldn't matter) - dep_edges_expected = set([ - ("s1", "s2"), - ("s2", "s2"), - ("s3", "s4"), - ]) + depends_on_dict_expected = { + "s2": set(["s1", "s2"]), + "s4": set(["s3"]), + "s5": set(["s2"]), + } - assert dep_edges_filtered == dep_edges_expected + assert filtered_depends_on_dict == depends_on_dict_expected # }}} From 57dc139f2cd461b4ad413a1cff33c84cc44ec720 Mon Sep 17 00:00:00 2001 From: jdsteve2 Date: Sat, 24 Apr 2021 02:38:33 -0500 Subject: [PATCH 08/22] create new kernel option use_dependencies_v2 --- loopy/options.py | 1 + 1 file changed, 1 insertion(+) diff --git a/loopy/options.py b/loopy/options.py index 45eb3eb63..3742cb27b 100644 --- a/loopy/options.py +++ b/loopy/options.py @@ -231,6 +231,7 @@ def __init__( disable_global_barriers=kwargs.get("disable_global_barriers", False), check_dep_resolution=kwargs.get("check_dep_resolution", True), + use_dependencies_v2=kwargs.get("use_dependencies_v2", False), enforce_variable_access_ordered=kwargs.get( "enforce_variable_access_ordered", True), From 111419af655e4344c7560cab0671504a943fff1e Mon Sep 17 00:00:00 2001 From: jdsteve2 Date: Sat, 24 Apr 2021 02:39:11 -0500 Subject: [PATCH 09/22] use new dependencies to create cartoon dep graph for linearization --- loopy/schedule/__init__.py | 93 ++++++++++++++++++++++++++------------ 1 file changed, 65 insertions(+), 28 deletions(-) diff --git a/loopy/schedule/__init__.py b/loopy/schedule/__init__.py index 7a2c9b80e..dc3696923 100644 --- a/loopy/schedule/__init__.py +++ b/loopy/schedule/__init__.py @@ -252,57 +252,94 @@ def find_loop_nest_around_map(kernel): return result -def find_loop_insn_dep_map(kernel, loop_nest_with_map, loop_nest_around_map): +def find_loop_insn_dep_map( + kernel, loop_nest_with_map, loop_nest_around_map, + cartoon_depends_on_dict, use_dependencies_v2=False, + ): """Returns a dictionary mapping inames to other instruction ids that need to be scheduled before the iname should be eligible for scheduling. + + :arg loop_nest_with_map: Dictionary mapping iname1 to a set containing + iname2 iff either iname1 nests around iname2 or iname2 nests around + iname1 + + :arg loop_nest_around_map: Dictionary mapping iname1 to a set containing + iname2 iff iname2 nests around iname1 + """ result = {} from loopy.kernel.data import ConcurrentTag, IlpBaseTag + # For each insn, examine its inames (`iname`) and its dependees' inames + # (`dep_iname`) to determine which instructions must be scheduled before + # entering the iname loop. + # Create result dict, which maps iname to instructions that must be + # scheduled prior to entering iname. + + # For each insn, loop over its non-concurrent inames (`iname`) for insn in kernel.instructions: for iname in kernel.insn_inames(insn): + # (Ignore concurrent inames) if kernel.iname_tags_of_type(iname, ConcurrentTag): continue + # Let iname_dep be the set of ids associated with result[iname] + # (if iname is not already in result, add iname as a key) iname_dep = result.setdefault(iname, set()) - for dep_insn_id in insn.depends_on: + # Loop over instructions on which insn depends (dep_insn) + # and determine whether dep_insn must be schedued before + # iname, in which case add its id to iname_dep (result[iname]) + if kernel.options.use_dependencies_v2: + dependee_ids = cartoon_depends_on_dict.get(insn.id, set()) + else: + dependee_ids = insn.depends_on + + for dep_insn_id in dependee_ids: if dep_insn_id in iname_dep: # already depending, nothing to check continue - dep_insn = kernel.id_to_insn[dep_insn_id] - dep_insn_inames = dep_insn.within_inames + dep_insn = kernel.id_to_insn[dep_insn_id] # Dependee + dep_insn_inames = dep_insn.within_inames # Dependee inames + # Check whether insn's iname is also in dependee inames if iname in dep_insn_inames: - # Nothing to be learned, dependency is in loop over iname + # Nothing to be learned, dependee is inside loop over iname # already. continue # To make sure dep_insn belongs outside of iname, we must prove - # that all inames that dep_insn will be executed in nest + # that all inames in which dep_insn will be executed nest # outside of the loop over *iname*. (i.e. nested around, or # before). + # Loop over each of the dependee's inames (dep_insn_iname) may_add_to_loop_dep_map = True for dep_insn_iname in dep_insn_inames: + + # If loop_nest_around_map says dep_insn_iname nests around + # iname, dep_insn_iname is guaranteed to nest outside of + # iname, we're safe, so continue if dep_insn_iname in loop_nest_around_map[iname]: - # dep_insn_iname is guaranteed to nest outside of iname - # -> safe. continue + # If dep_insn_iname is concurrent, continue + # (parallel tags don't really nest, so disregard them here) if kernel.iname_tags_of_type(dep_insn_iname, (ConcurrentTag, IlpBaseTag)): - # Parallel tags don't really nest, so we'll disregard - # them here. continue + # If loop_nest_with_map says dep_insn_iname does not nest + # inside or around iname, it must be nested separately; + # we're safe, so continue if dep_insn_iname not in loop_nest_with_map.get(iname, []): - # dep_insn_iname does not nest with iname, so its nest - # must occur outside. continue + # If none of the three cases above succeeds for any + # dep_insn_iname in dep_insn_inames, we cannot add dep_insn + # to iname's set of insns in result dict. may_add_to_loop_dep_map = False break @@ -317,6 +354,9 @@ def find_loop_insn_dep_map(kernel, loop_nest_with_map, loop_nest_around_map): dep_insn=dep_insn_id, insn=insn.id)) + # If at least one of the three cases above succeeds for every + # dep_insn_iname, we can add dep_insn to iname's set of insns + # in result dict. iname_dep.add(dep_insn_id) return result @@ -1985,22 +2025,18 @@ def generate_loop_schedules_inner(kernel, debug_args={}): loop_nest_with_map = find_loop_nest_with_map(kernel) loop_nest_around_map = find_loop_nest_around_map(kernel) - # {{{ create dependency graph with edges from depender* to dependee* - # iff intersection (SAME_map & DEP_map) is not empty + # {{{ create cartoon dependency graph with edge from depender* to + # dependee* iff intersection (SAME_map & DEP_map) is not empty - from loopy.schedule.checker.dependency import ( - filter_deps_by_intersection_with_SAME, - ) - from loopy.schedule.checker.utils import ( - create_graph_from_pairs, - ) + if kernel.options.use_dependencies_v2: + from loopy.schedule.checker.dependency import ( + filter_deps_by_intersection_with_SAME, + ) - # Get dep graph edges with edges from depender->dependee - dep_graph_pairs = filter_deps_by_intersection_with_SAME(kernel) - - # Create dep graph from edges - insn_depends_on_graph = create_graph_from_pairs(dep_graph_pairs) - # TODO create ^this func + # Get dep graph edges with edges FROM depender TO dependee + cartoon_depends_on_dict = filter_deps_by_intersection_with_SAME(kernel) + else: + cartoon_depends_on_dict = None # }}} @@ -2011,8 +2047,9 @@ def generate_loop_schedules_inner(kernel, debug_args={}): kernel, loop_nest_with_map=loop_nest_with_map, loop_nest_around_map=loop_nest_around_map, - insn_depends_on_graph=insn_depends_on_graph), # TODO deal with this - insn_depends_on_graph=insn_depends_on_graph, # TODO deal with this + cartoon_depends_on_dict=cartoon_depends_on_dict, + ), + #insn_depends_on_graph=insn_depends_on_graph, # TODO deal with this breakable_inames=ilp_inames, ilp_inames=ilp_inames, vec_inames=vec_inames, From 926ae65558c5b7543a5c8c9b66d2ed16b8f978bb Mon Sep 17 00:00:00 2001 From: jdsteve2 Date: Sat, 24 Apr 2021 03:52:22 -0500 Subject: [PATCH 10/22] test use of cartoon dep graph inside find_loop_insn_dep_map() with new test: test_find_loop_insn_dep_map_using_cartoon_dep_graph() --- test/test_linearization_checker.py | 125 ++++++++++++++++++++++++++--- 1 file changed, 116 insertions(+), 9 deletions(-) diff --git a/test/test_linearization_checker.py b/test/test_linearization_checker.py index 383278326..b3b1dc495 100644 --- a/test/test_linearization_checker.py +++ b/test/test_linearization_checker.py @@ -1099,11 +1099,11 @@ def test_sios_and_schedules_with_vec_and_barriers(): # Get a linearization proc_knl = preprocess_kernel(knl) lin_knl = get_one_linearized_kernel(proc_knl) - linearization_items = lin_knl.linearization + lin_items = lin_knl.linearization stmt_id_pairs = [("stmt_1", "stmt_2")] pworders = get_pairwise_statement_orderings( - lin_knl, linearization_items, stmt_id_pairs) + lin_knl, lin_items, stmt_id_pairs) # {{{ Relationship between stmt_1 and stmt_2 @@ -1321,12 +1321,12 @@ def test_sios_with_matmul(): # Get a linearization proc_knl = preprocess_kernel(knl) lin_knl = get_one_linearized_kernel(proc_knl) - linearization_items = lin_knl.linearization + lin_items = lin_knl.linearization # Get ALL statement id pairs from loopy.schedule import RunInstruction all_stmt_ids = [ - lin_item.insn_id for lin_item in linearization_items + lin_item.insn_id for lin_item in lin_items if isinstance(lin_item, RunInstruction)] from itertools import product stmt_id_pairs = [] @@ -1335,7 +1335,7 @@ def test_sios_with_matmul(): # Generate pairwise ordering info for every pair get_pairwise_statement_orderings( - lin_knl, linearization_items, stmt_id_pairs) + lin_knl, lin_items, stmt_id_pairs) # }}} @@ -2262,11 +2262,11 @@ def test_filtering_deps_by_same(): knl = lp.make_kernel( "{[i,j,k,m] : 0 <= i,j,k,m < n}", """ - a[i,j,k,m] = 1 {id=s1} - a[i,j,k,m] = 2 {id=s2} - a[i,j,k,m] = 3 {id=s3} - a[i,j,k,m] = 4 {id=s4} a[i,j,k,m] = 5 {id=s5} + a[i,j,k,m] = 4 {id=s4} + a[i,j,k,m] = 3 {id=s3} + a[i,j,k,m] = 2 {id=s2} + a[i,j,k,m] = 1 {id=s1} """) knl = lp.add_and_infer_dtypes(knl, {"a": np.float32}) knl = lp.tag_inames(knl, "m:l.0") @@ -2332,6 +2332,113 @@ def _dep_with_condition(cond): # }}} + +# {{{ test_find_loop_insn_dep_map_using_cartoon_dep_graph + +def test_find_loop_insn_dep_map_using_cartoon_dep_graph(): + # Test use of cartoon dep graph inside find_loop_insn_dep_map(), + # which is called during linearization, and should cause + # linearization process to order the x loops below sequentially + + # Make a kernel + knl = lp.make_kernel( + "{[i,j,k,m,x1,x2,x3,x4,x5] : 0 <= i,j,k,m,x1,x2,x3,x4,x5 < n}", + """ + for i,j,k,m + for x5 + <>t5 = 5 {id=s5} + end + for x3 + <>t3 = 3 {id=s3} + end + for x4 + <>t4 = 4 {id=s4} + end + for x1 + <>t1 = 1 {id=s1} + end + for x2 + <>t2 = 2 {id=s2} + end + end + """) + knl = lp.tag_inames(knl, "m:l.0") + + # Make some deps + + def _dep_with_condition(xloop_after, xloop_before, cond): + sid_after = 0 if xloop_before == xloop_after else 1 + return _isl_map_with_marked_dims( + "[n] -> {{" + "[{0}'=0, i', j', k', m', x{1}'] -> [{0}={3}, i, j, k, m, x{2}] : " + "0 <= i,j,k,m,x{2},i',j',k',m',x{1}' < n and {4}" + "}}".format( + STATEMENT_VAR_NAME, xloop_before, xloop_after, sid_after, cond)) + + # Should NOT create an edge: + dep_s2_on_s1_1 = _dep_with_condition(2, 1, "i'< i and j'<=j and k' =k and m'=m") + # Should create an edge: + dep_s2_on_s1_2 = _dep_with_condition(2, 1, "i'<=i and j'<=j and k' =k and m'=m") + # Should NOT create an edge: + dep_s2_on_s2_1 = _dep_with_condition(2, 2, "i'< i and j'<=j and k' =k and m'=m") + # Should NOT create an edge: + dep_s2_on_s2_2 = _dep_with_condition(2, 2, "i'<=i and j'<=j and k'< k and m'=m") + # Should create an edge: + dep_s3_on_s2_1 = _dep_with_condition(3, 2, "i'<=i and j'<=j and k' =k and m'=m") + # Should create an edge: + dep_s4_on_s3_1 = _dep_with_condition(4, 3, "i'<=i and j'<=j and k' =k and m'=m") + # Should create an edge: + dep_s5_on_s4_1 = _dep_with_condition(5, 4, "i' =i and j' =j and k' =k and m'=m") + + knl = lp.add_dependency_v2(knl, "s2", "s1", dep_s2_on_s1_1) + knl = lp.add_dependency_v2(knl, "s2", "s1", dep_s2_on_s1_2) + knl = lp.add_dependency_v2(knl, "s2", "s2", dep_s2_on_s2_1) + knl = lp.add_dependency_v2(knl, "s2", "s2", dep_s2_on_s2_2) + knl = lp.add_dependency_v2(knl, "s3", "s2", dep_s3_on_s2_1) + knl = lp.add_dependency_v2(knl, "s4", "s3", dep_s4_on_s3_1) + knl = lp.add_dependency_v2(knl, "s5", "s4", dep_s5_on_s4_1) + + # Test filteringn of deps by intersection with SAME + + from loopy.schedule.checker.dependency import ( + filter_deps_by_intersection_with_SAME, + ) + filtered_depends_on_dict = filter_deps_by_intersection_with_SAME(knl) + + # Make sure filtered edges are correct + + # (m is concurrent so shouldn't matter) + depends_on_dict_expected = { + "s2": set(["s1"]), + "s3": set(["s2"]), + "s4": set(["s3"]), + "s5": set(["s4"]), + } + + assert filtered_depends_on_dict == depends_on_dict_expected + + # Get a linearization + knl = lp.set_options(knl, use_dependencies_v2=True) + proc_knl = preprocess_kernel(knl) + lin_knl = get_one_linearized_kernel(proc_knl) + lin_items = lin_knl.linearization + + # Check stmt order + from loopy.schedule import RunInstruction + stmt_ids_ordered = [ + lin_item.insn_id for lin_item in lin_items + if isinstance(lin_item, RunInstruction)] + + stmt_ids_ordered_expected = ["s1", "s2", "s3", "s4", "s5"] + + assert stmt_ids_ordered == stmt_ids_ordered_expected + + # Check dep satisfaction + unsatisfied_deps = lp.find_unsatisfied_dependencies(proc_knl, lin_items) + assert not unsatisfied_deps + +# }}} + # }}} # }}} From c689a9d43238076d375e12f2a8c180ad9c685ad6 Mon Sep 17 00:00:00 2001 From: jdsteve2 Date: Sat, 24 Apr 2021 23:00:10 -0500 Subject: [PATCH 11/22] enable usage of cartoon dependency graph for statement ordering decisions during linearization --- loopy/schedule/__init__.py | 34 ++++++++++++++++++++++++++-------- 1 file changed, 26 insertions(+), 8 deletions(-) diff --git a/loopy/schedule/__init__.py b/loopy/schedule/__init__.py index dc3696923..37f44b310 100644 --- a/loopy/schedule/__init__.py +++ b/loopy/schedule/__init__.py @@ -372,16 +372,24 @@ def group_insn_counts(kernel): return result -def gen_dependencies_except(kernel, insn_id, except_insn_ids): - insn = kernel.id_to_insn[insn_id] - for dep_id in insn.depends_on: +def gen_dependencies_except( + kernel, insn_id, except_insn_ids, cartoon_depends_on_dict): + + # Get dependee IDs + if kernel.options.use_dependencies_v2: + dependee_ids = cartoon_depends_on_dict.get(insn_id, set()) + else: + dependee_ids = kernel.id_to_insn[insn_id].depends_on + + for dep_id in dependee_ids: if dep_id in except_insn_ids: continue yield dep_id - yield from gen_dependencies_except(kernel, dep_id, except_insn_ids) + yield from gen_dependencies_except( + kernel, dep_id, except_insn_ids, cartoon_depends_on_dict) def get_priority_tiers(wanted, priorities): @@ -665,6 +673,8 @@ class SchedulerState(ImmutableRecord): order with instruction priorities as tie breaker. """ + # TODO document cartoon_depends_on_dict + @property def last_entered_loop(self): if self.active_inames: @@ -971,7 +981,13 @@ def insn_sort_key(insn_id): for insn_id in insn_ids_to_try: insn = kernel.id_to_insn[insn_id] - is_ready = insn.depends_on <= sched_state.scheduled_insn_ids + # make sure dependees have been scheduled + if kernel.options.use_dependencies_v2: + dependee_ids = sched_state.cartoon_depends_on_dict.get(insn.id, set()) + else: + dependee_ids = insn.depends_on + + is_ready = dependee_ids <= sched_state.scheduled_insn_ids if not is_ready: continue @@ -1150,8 +1166,10 @@ def insn_sort_key(insn_id): # check if there's a dependency of insn that needs to be # outside of last_entered_loop. - for subdep_id in gen_dependencies_except(kernel, insn_id, - sched_state.scheduled_insn_ids): + for subdep_id in gen_dependencies_except( + kernel, insn_id, + sched_state.scheduled_insn_ids, + sched_state.cartoon_depends_on_dict): want = (kernel.insn_inames(subdep_id) - sched_state.parallel_inames) if ( @@ -2049,7 +2067,7 @@ def generate_loop_schedules_inner(kernel, debug_args={}): loop_nest_around_map=loop_nest_around_map, cartoon_depends_on_dict=cartoon_depends_on_dict, ), - #insn_depends_on_graph=insn_depends_on_graph, # TODO deal with this + cartoon_depends_on_dict=cartoon_depends_on_dict, breakable_inames=ilp_inames, ilp_inames=ilp_inames, vec_inames=vec_inames, From ac5dc08e635506a0d4f1f181f1be977b6c0037cb Mon Sep 17 00:00:00 2001 From: jdsteve2 Date: Sat, 24 Apr 2021 23:02:50 -0500 Subject: [PATCH 12/22] rename cartoon_depends_on_dict->simplified_depends_on_graph --- loopy/schedule/__init__.py | 27 ++++++++++++++------------- 1 file changed, 14 insertions(+), 13 deletions(-) diff --git a/loopy/schedule/__init__.py b/loopy/schedule/__init__.py index 37f44b310..94176ff12 100644 --- a/loopy/schedule/__init__.py +++ b/loopy/schedule/__init__.py @@ -254,7 +254,7 @@ def find_loop_nest_around_map(kernel): def find_loop_insn_dep_map( kernel, loop_nest_with_map, loop_nest_around_map, - cartoon_depends_on_dict, use_dependencies_v2=False, + simplified_depends_on_graph, use_dependencies_v2=False, ): """Returns a dictionary mapping inames to other instruction ids that need to be scheduled before the iname should be eligible for scheduling. @@ -292,7 +292,7 @@ def find_loop_insn_dep_map( # and determine whether dep_insn must be schedued before # iname, in which case add its id to iname_dep (result[iname]) if kernel.options.use_dependencies_v2: - dependee_ids = cartoon_depends_on_dict.get(insn.id, set()) + dependee_ids = simplified_depends_on_graph.get(insn.id, set()) else: dependee_ids = insn.depends_on @@ -373,11 +373,11 @@ def group_insn_counts(kernel): def gen_dependencies_except( - kernel, insn_id, except_insn_ids, cartoon_depends_on_dict): + kernel, insn_id, except_insn_ids, simplified_depends_on_graph): # Get dependee IDs if kernel.options.use_dependencies_v2: - dependee_ids = cartoon_depends_on_dict.get(insn_id, set()) + dependee_ids = simplified_depends_on_graph.get(insn_id, set()) else: dependee_ids = kernel.id_to_insn[insn_id].depends_on @@ -389,7 +389,7 @@ def gen_dependencies_except( yield dep_id yield from gen_dependencies_except( - kernel, dep_id, except_insn_ids, cartoon_depends_on_dict) + kernel, dep_id, except_insn_ids, simplified_depends_on_graph) def get_priority_tiers(wanted, priorities): @@ -673,7 +673,7 @@ class SchedulerState(ImmutableRecord): order with instruction priorities as tie breaker. """ - # TODO document cartoon_depends_on_dict + # TODO document simplified_depends_on_graph @property def last_entered_loop(self): @@ -983,7 +983,8 @@ def insn_sort_key(insn_id): # make sure dependees have been scheduled if kernel.options.use_dependencies_v2: - dependee_ids = sched_state.cartoon_depends_on_dict.get(insn.id, set()) + dependee_ids = sched_state.simplified_depends_on_graph.get( + insn.id, set()) else: dependee_ids = insn.depends_on @@ -1169,7 +1170,7 @@ def insn_sort_key(insn_id): for subdep_id in gen_dependencies_except( kernel, insn_id, sched_state.scheduled_insn_ids, - sched_state.cartoon_depends_on_dict): + sched_state.simplified_depends_on_graph): want = (kernel.insn_inames(subdep_id) - sched_state.parallel_inames) if ( @@ -2043,7 +2044,7 @@ def generate_loop_schedules_inner(kernel, debug_args={}): loop_nest_with_map = find_loop_nest_with_map(kernel) loop_nest_around_map = find_loop_nest_around_map(kernel) - # {{{ create cartoon dependency graph with edge from depender* to + # {{{ create simplified dependency graph with edge from depender* to # dependee* iff intersection (SAME_map & DEP_map) is not empty if kernel.options.use_dependencies_v2: @@ -2052,9 +2053,9 @@ def generate_loop_schedules_inner(kernel, debug_args={}): ) # Get dep graph edges with edges FROM depender TO dependee - cartoon_depends_on_dict = filter_deps_by_intersection_with_SAME(kernel) + simplified_depends_on_graph = filter_deps_by_intersection_with_SAME(kernel) else: - cartoon_depends_on_dict = None + simplified_depends_on_graph = None # }}} @@ -2065,9 +2066,9 @@ def generate_loop_schedules_inner(kernel, debug_args={}): kernel, loop_nest_with_map=loop_nest_with_map, loop_nest_around_map=loop_nest_around_map, - cartoon_depends_on_dict=cartoon_depends_on_dict, + simplified_depends_on_graph=simplified_depends_on_graph, ), - cartoon_depends_on_dict=cartoon_depends_on_dict, + simplified_depends_on_graph=simplified_depends_on_graph, breakable_inames=ilp_inames, ilp_inames=ilp_inames, vec_inames=vec_inames, From 3d808f1a653aece8425a41762416b5382fdd5329 Mon Sep 17 00:00:00 2001 From: jdsteve2 Date: Sat, 24 Apr 2021 23:03:52 -0500 Subject: [PATCH 13/22] test usage of cartoon dependency graph for statement ordering decisions during linearization --- test/test_linearization_checker.py | 118 ++++++++++++++++------------- 1 file changed, 64 insertions(+), 54 deletions(-) diff --git a/test/test_linearization_checker.py b/test/test_linearization_checker.py index b3b1dc495..10c38ddfd 100644 --- a/test/test_linearization_checker.py +++ b/test/test_linearization_checker.py @@ -138,6 +138,13 @@ def _check_orderings_for_stmt_pair( maps_to_compare = [(m1, m2) for m1, m2 in map_candidates if m1 is not None] _align_and_compare_maps(maps_to_compare) + +def _get_runinstruction_ids_from_linearization(lin_items): + from loopy.schedule import RunInstruction + return [ + lin_item.insn_id for lin_item in lin_items + if isinstance(lin_item, RunInstruction)] + # }}} @@ -1316,18 +1323,13 @@ def test_sios_with_matmul(): knl, "b", ["j_inner", "k_inner"], default_tag="l.auto") knl = lp.prioritize_loops(knl, "k_outer,k_inner") - proc_knl = preprocess_kernel(knl) - # Get a linearization proc_knl = preprocess_kernel(knl) lin_knl = get_one_linearized_kernel(proc_knl) lin_items = lin_knl.linearization # Get ALL statement id pairs - from loopy.schedule import RunInstruction - all_stmt_ids = [ - lin_item.insn_id for lin_item in lin_items - if isinstance(lin_item, RunInstruction)] + all_stmt_ids = _get_runinstruction_ids_from_linearization(lin_items) from itertools import product stmt_id_pairs = [] for idx, sid in enumerate(all_stmt_ids): @@ -2273,28 +2275,30 @@ def test_filtering_deps_by_same(): # Make some deps - def _dep_with_condition(cond): + def _dep_with_condition(stmt_before, stmt_after, cond): + sid_after = 0 if stmt_before == stmt_after else 1 return _isl_map_with_marked_dims( "[n] -> {{" - "[{0}'=0, i', j', k', m'] -> [{0}=0, i, j, k, m] : " - "0 <= i,j,k,m,i',j',k',m' < n and {1}" - "}}".format(STATEMENT_VAR_NAME, cond)) + "[{0}'=0, i', j', k', m'] -> [{0}={1}, i, j, k, m] : " + "0 <= i,j,k,m,i',j',k',m' < n and {2}" + "}}".format( + STATEMENT_VAR_NAME, sid_after, cond)) - dep_s2_on_s1_1 = _dep_with_condition("i' < i and j' <= j and k' = k and m' < m") - dep_s2_on_s1_2 = _dep_with_condition("i' <= i and j' <= j and k' = k and m' < m") + dep_s2_on_s1_1 = _dep_with_condition(2, 1, "i'< i and j'<=j and k'=k and m't5 = 5 {id=s5} - end - for x3 - <>t3 = 3 {id=s3} - end - for x4 - <>t4 = 4 {id=s4} - end - for x1 - <>t1 = 1 {id=s1} - end - for x2 - <>t2 = 2 {id=s2} - end + <>t5 = 5 {id=s5} + <>t3 = 3 {id=s3} + <>t4 = 4 {id=s4} + <>t1 = 1 {id=s1} + <>t2 = 2 {id=s2} end """) knl = lp.tag_inames(knl, "m:l.0") # Make some deps - def _dep_with_condition(xloop_after, xloop_before, cond): - sid_after = 0 if xloop_before == xloop_after else 1 + def _dep_with_condition(stmt_before, stmt_after, cond): + sid_after = 0 if stmt_before == stmt_after else 1 return _isl_map_with_marked_dims( "[n] -> {{" - "[{0}'=0, i', j', k', m', x{1}'] -> [{0}={3}, i, j, k, m, x{2}] : " - "0 <= i,j,k,m,x{2},i',j',k',m',x{1}' < n and {4}" + "[{0}'=0, i', j', k', m'] -> [{0}={1}, i, j, k, m] : " + "0 <= i,j,k,m,i',j',k',m' < n and {2}" "}}".format( - STATEMENT_VAR_NAME, xloop_before, xloop_after, sid_after, cond)) + STATEMENT_VAR_NAME, sid_after, cond)) # Should NOT create an edge: dep_s2_on_s1_1 = _dep_with_condition(2, 1, "i'< i and j'<=j and k' =k and m'=m") @@ -2417,26 +2412,41 @@ def _dep_with_condition(xloop_after, xloop_before, cond): assert filtered_depends_on_dict == depends_on_dict_expected - # Get a linearization - knl = lp.set_options(knl, use_dependencies_v2=True) + stmt_ids_ordered_desired = ["s1", "s2", "s3", "s4", "s5"] + + # {{{ Get a linearization WITHOUT using the simplified dep graph + + knl = lp.set_options(knl, use_dependencies_v2=False) proc_knl = preprocess_kernel(knl) lin_knl = get_one_linearized_kernel(proc_knl) lin_items = lin_knl.linearization # Check stmt order - from loopy.schedule import RunInstruction - stmt_ids_ordered = [ - lin_item.insn_id for lin_item in lin_items - if isinstance(lin_item, RunInstruction)] + stmt_ids_ordered = _get_runinstruction_ids_from_linearization(lin_items) + assert stmt_ids_ordered != stmt_ids_ordered_desired + + # Check dep satisfaction + unsatisfied_deps = lp.find_unsatisfied_dependencies(proc_knl, lin_items) + assert unsatisfied_deps - stmt_ids_ordered_expected = ["s1", "s2", "s3", "s4", "s5"] + # }}} - assert stmt_ids_ordered == stmt_ids_ordered_expected + # {{{ Get a linearization using the simplified dep graph + knl = lp.set_options(knl, use_dependencies_v2=True) + proc_knl = preprocess_kernel(knl) + lin_knl = get_one_linearized_kernel(proc_knl) + lin_items = lin_knl.linearization + + # Check stmt order + stmt_ids_ordered = _get_runinstruction_ids_from_linearization(lin_items) + assert stmt_ids_ordered == stmt_ids_ordered_desired # Check dep satisfaction unsatisfied_deps = lp.find_unsatisfied_dependencies(proc_knl, lin_items) assert not unsatisfied_deps + # }}} + # }}} # }}} From 1d12fc05038491e0de3dae63d6207512701108a6 Mon Sep 17 00:00:00 2001 From: jdsteve2 Date: Sat, 24 Apr 2021 23:42:07 -0500 Subject: [PATCH 14/22] reduce duplicated code in tests by using _process_and_linearize(knl) function (created in ancestor branch) --- test/test_linearization_checker.py | 23 ++++++++++++----------- 1 file changed, 12 insertions(+), 11 deletions(-) diff --git a/test/test_linearization_checker.py b/test/test_linearization_checker.py index 43aa804f7..50eab91cc 100644 --- a/test/test_linearization_checker.py +++ b/test/test_linearization_checker.py @@ -2348,7 +2348,9 @@ def test_linearization_using_simplified_dep_graph(): """) knl = lp.tag_inames(knl, "m:l.0") - # Make some deps + stmt_ids_ordered_desired = ["s1", "s2", "s3", "s4", "s5"] + + # {{{ Add some deps def _dep_with_condition(stmt_before, stmt_after, cond): sid_after = 0 if stmt_before == stmt_after else 1 @@ -2382,7 +2384,9 @@ def _dep_with_condition(stmt_before, stmt_after, cond): knl = lp.add_dependency_v2(knl, "s4", "s3", dep_s4_on_s3_1) knl = lp.add_dependency_v2(knl, "s5", "s4", dep_s5_on_s4_1) - # Test filteringn of deps by intersection with SAME + # }}} + + # {{{ Test filteringn of deps by intersection with SAME from loopy.schedule.checker.dependency import ( filter_deps_by_intersection_with_SAME, @@ -2401,30 +2405,27 @@ def _dep_with_condition(stmt_before, stmt_after, cond): assert filtered_depends_on_dict == depends_on_dict_expected - stmt_ids_ordered_desired = ["s1", "s2", "s3", "s4", "s5"] + # }}} # {{{ Get a linearization WITHOUT using the simplified dep graph knl = lp.set_options(knl, use_dependencies_v2=False) - proc_knl = preprocess_kernel(knl) - lin_knl = get_one_linearized_kernel(proc_knl) - lin_items = lin_knl.linearization + lin_items, proc_knl, lin_knl = _process_and_linearize(knl) - # Check stmt order + # Check stmt order (should be wrong) stmt_ids_ordered = _get_runinstruction_ids_from_linearization(lin_items) assert stmt_ids_ordered != stmt_ids_ordered_desired - # Check dep satisfaction + # Check dep satisfaction (should not all be satisfied) unsatisfied_deps = lp.find_unsatisfied_dependencies(proc_knl, lin_items) assert unsatisfied_deps # }}} # {{{ Get a linearization using the simplified dep graph + knl = lp.set_options(knl, use_dependencies_v2=True) - proc_knl = preprocess_kernel(knl) - lin_knl = get_one_linearized_kernel(proc_knl) - lin_items = lin_knl.linearization + lin_items, proc_knl, lin_knl = _process_and_linearize(knl) # Check stmt order stmt_ids_ordered = _get_runinstruction_ids_from_linearization(lin_items) From ffd0610b2d4366086e3bdfc9ab0f00f6554300de Mon Sep 17 00:00:00 2001 From: jdsteve2 Date: Sun, 25 Apr 2021 00:20:03 -0500 Subject: [PATCH 15/22] in every remaining point during linearization where dependencies are used, check kernel option use_dependencies_v2 to determine whether to use the new simplified (cartoon) dep graph instead of insn.depends_on --- loopy/schedule/__init__.py | 41 +++++++++++++++++++++++++++++--------- 1 file changed, 32 insertions(+), 9 deletions(-) diff --git a/loopy/schedule/__init__.py b/loopy/schedule/__init__.py index 94176ff12..fc084fd68 100644 --- a/loopy/schedule/__init__.py +++ b/loopy/schedule/__init__.py @@ -254,8 +254,7 @@ def find_loop_nest_around_map(kernel): def find_loop_insn_dep_map( kernel, loop_nest_with_map, loop_nest_around_map, - simplified_depends_on_graph, use_dependencies_v2=False, - ): + simplified_depends_on_graph): """Returns a dictionary mapping inames to other instruction ids that need to be scheduled before the iname should be eligible for scheduling. @@ -266,6 +265,13 @@ def find_loop_insn_dep_map( :arg loop_nest_around_map: Dictionary mapping iname1 to a set containing iname2 iff iname2 nests around iname1 + :arg simplified_depends_on_graph: Dictionary mapping depender statement IDs + to sets of dependee statement IDs, as produced by + `loopy.schedule.checker.dependency.filter_deps_by_intersection_with_SAME`, + which will be used to acquire depndee statement ids if + `kernel.options.use_dependencies_v2` is 'True' (otherwise old + dependencies in insn.depends_on will be used). + """ result = {} @@ -685,12 +691,20 @@ def last_entered_loop(self): # }}} -def get_insns_in_topologically_sorted_order(kernel): +def get_insns_in_topologically_sorted_order( + kernel, simplified_depends_on_graph): from pytools.graph import compute_topological_order rev_dep_map = {insn.id: set() for insn in kernel.instructions} for insn in kernel.instructions: - for dep in insn.depends_on: + + if kernel.options.use_dependencies_v2: + dependee_ids = simplified_depends_on_graph.get( + insn.id, set()) + else: + dependee_ids = insn.depends_on + + for dep in dependee_ids: rev_dep_map[dep].add(insn.id) # For breaking ties, we compare the features of an intruction @@ -724,7 +738,8 @@ def key(insn_id): # {{{ schedule_as_many_run_insns_as_possible -def schedule_as_many_run_insns_as_possible(sched_state, template_insn): +def schedule_as_many_run_insns_as_possible( + sched_state, template_insn, use_dependencies_v2): """ Returns an instance of :class:`loopy.schedule.SchedulerState`, by appending all reachable instructions that are similar to *template_insn*. We define @@ -792,7 +807,14 @@ def is_similar_to_template(insn): if is_similar_to_template(insn): # check reachability - if not (insn.depends_on & ignored_unscheduled_insn_ids): + + if use_dependencies_v2: + dependee_ids = sched_state.simplified_depends_on_graph.get( + insn.id, set()) + else: + dependee_ids = insn.depends_on + + if not (dependee_ids & ignored_unscheduled_insn_ids): if insn.id in sched_state.prescheduled_insn_ids: if next_preschedule_insn_id() == insn.id: preschedule.pop(0) @@ -1119,8 +1141,8 @@ def insn_sort_key(insn_id): insns_in_topologically_sorted_order=new_toposorted_insns, ) - new_sched_state = schedule_as_many_run_insns_as_possible(new_sched_state, - insn) + new_sched_state = schedule_as_many_run_insns_as_possible( + new_sched_state, insn, kernel.options.use_dependencies_v2) # Don't be eager about entering/leaving loops--if progress has been # made, revert to top of scheduler and see if more progress can be @@ -2098,7 +2120,8 @@ def generate_loop_schedules_inner(kernel, debug_args={}): active_group_counts={}, insns_in_topologically_sorted_order=( - get_insns_in_topologically_sorted_order(kernel)), + get_insns_in_topologically_sorted_order( + kernel, simplified_depends_on_graph)), ) schedule_gen_kwargs = {} From bc23d84419919227f58d01b21caf48a387402aed Mon Sep 17 00:00:00 2001 From: jdsteve2 Date: Sun, 25 Apr 2021 00:20:58 -0500 Subject: [PATCH 16/22] update comment --- test/test_linearization_checker.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/test/test_linearization_checker.py b/test/test_linearization_checker.py index 50eab91cc..8f83577e0 100644 --- a/test/test_linearization_checker.py +++ b/test/test_linearization_checker.py @@ -2329,8 +2329,7 @@ def _dep_with_condition(stmt_before, stmt_after, cond): # {{{ test_linearization_using_simplified_dep_graph def test_linearization_using_simplified_dep_graph(): - # Test use of simplified dep graph inside find_loop_insn_dep_map(), - # which is called during linearization. + # Test use of simplified dep graph during linearization. # The deps created below should yield a simplified dep graph that causes the # linearization process to order assignments below in numerical order From 46580b54f395ed9bdc1123b850265e62f8b970a5 Mon Sep 17 00:00:00 2001 From: jdsteve2 Date: Sun, 16 May 2021 20:42:01 -0500 Subject: [PATCH 17/22] in map_domain, ignore statement var name in maps when performing iname overlap check --- loopy/transform/iname.py | 21 +++++++++++++++------ 1 file changed, 15 insertions(+), 6 deletions(-) diff --git a/loopy/transform/iname.py b/loopy/transform/iname.py index 509b820eb..565c71cf7 100644 --- a/loopy/transform/iname.py +++ b/loopy/transform/iname.py @@ -2077,12 +2077,25 @@ def map_domain(kernel, isl_map, within=None, rename_after={}): # }}} + from loopy.schedule.checker.schedule import ( + BEFORE_MARK, + STATEMENT_VAR_NAME, + ) + def _check_overlap_condition_for_domain(s, transform_map_in_names): + + names_to_ignore = set([STATEMENT_VAR_NAME, STATEMENT_VAR_NAME+BEFORE_MARK]) + transform_map_in_inames = transform_map_in_names - names_to_ignore + var_dict = s.get_var_dict() - overlap = transform_map_in_names & frozenset(var_dict) + overlap = transform_map_in_inames & frozenset(var_dict) - if overlap and len(overlap) != len(transform_map_in_names): + # If there is any overlap in the inames in the transform map and s + # (note that we're ignoring the statement var name, which may have been + # added to a transform map or s), all of the transform map inames must be in + # the overlap. + if overlap and len(overlap) != len(transform_map_in_inames): raise LoopyError("loop domain '%s' involves a part " "of the map domain inames. Domains must " "either involve all or none of the map domain " @@ -2198,10 +2211,6 @@ def process_set(s): insert_and_name_isl_dims, add_eq_isl_constraint_from_names, ) - from loopy.schedule.checker.schedule import ( - BEFORE_MARK, - STATEMENT_VAR_NAME, - ) dt = isl.dim_type # Create version of transform map with before marks From bde5faffce5e78b5ecf478a8f469666fa4cdc527 Mon Sep 17 00:00:00 2001 From: jdsteve2 Date: Thu, 10 Jun 2021 17:20:09 -0500 Subject: [PATCH 18/22] (temporary fix) if using v2-deps for linearization, don't error in append_barrier_or_raise_error() --- loopy/schedule/__init__.py | 37 +++++++++++++++++++++++-------------- 1 file changed, 23 insertions(+), 14 deletions(-) diff --git a/loopy/schedule/__init__.py b/loopy/schedule/__init__.py index 7009d182a..56bd0745a 100644 --- a/loopy/schedule/__init__.py +++ b/loopy/schedule/__init__.py @@ -1829,20 +1829,28 @@ def _insn_ids_reaching_end(schedule, kind, reverse): return insn_ids_alive_at_scope[-1] -def append_barrier_or_raise_error(schedule, dep, verify_only): +def append_barrier_or_raise_error( + schedule, dep, verify_only, use_dependencies_v2=False): if verify_only: - from loopy.diagnostic import MissingBarrierError - raise MissingBarrierError( - "Dependency '%s' (for variable '%s') " - "requires synchronization " - "by a %s barrier (add a 'no_sync_with' " - "instruction option to state that no " - "synchronization is needed)" - % ( - dep.dep_descr.format( - tgt=dep.target.id, src=dep.source.id), - dep.variable, - dep.var_kind)) + err_str = ( + "Dependency '%s' (for variable '%s') " + "requires synchronization " + "by a %s barrier (add a 'no_sync_with' " + "instruction option to state that no " + "synchronization is needed)" + % ( + dep.dep_descr.format( + tgt=dep.target.id, src=dep.source.id), + dep.variable, + dep.var_kind)) + # TODO need to update all this with v2 deps. For now, make this a warning. + # Do full fix for this later + if use_dependencies_v2: + from warnings import warn + warn(err_str) + else: + from loopy.diagnostic import MissingBarrierError + raise MissingBarrierError(err_str) else: comment = "for {} ({})".format( dep.variable, dep.dep_descr.format( @@ -1909,7 +1917,8 @@ def insert_barriers_at_outer_level(schedule, reverse=False): for dep in chain.from_iterable( dep_tracker.gen_dependencies_with_target_at(insn) for insn in loop_head): - append_barrier_or_raise_error(result, dep, verify_only) + append_barrier_or_raise_error( + result, dep, verify_only, kernel.options.use_dependencies_v2) # This barrier gets inserted outside the loop, hence it is # executed unconditionally and so kills all sources before # the loop. From de7225d1b6b693554e9b6a8a64588f61d7ed05d0 Mon Sep 17 00:00:00 2001 From: jdsteve2 Date: Thu, 10 Jun 2021 18:11:04 -0500 Subject: [PATCH 19/22] (temporary fix) if using v2-deps for linearization, don't error in append_barrier_or_raise_error() --- loopy/schedule/__init__.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/loopy/schedule/__init__.py b/loopy/schedule/__init__.py index 56bd0745a..3f28a4b3a 100644 --- a/loopy/schedule/__init__.py +++ b/loopy/schedule/__init__.py @@ -1950,7 +1950,8 @@ def insert_barriers_at_outer_level(schedule, reverse=False): elif isinstance(sched_item, RunInstruction): for dep in dep_tracker.gen_dependencies_with_target_at( sched_item.insn_id): - append_barrier_or_raise_error(result, dep, verify_only) + append_barrier_or_raise_error( + result, dep, verify_only, kernel.options.use_dependencies_v2) dep_tracker.discard_all_sources() break result.append(sched_item) From 236d24a9088bbff61481b7d9a99cc83f7ff99eaa Mon Sep 17 00:00:00 2001 From: jdsteve2 Date: Tue, 15 Jun 2021 03:17:53 -0500 Subject: [PATCH 20/22] (temporariliy) add non_linearizing_deps attribute to instruction (set of dependee ids to be ignored when creating cartoon dag) --- loopy/kernel/instruction.py | 23 +++++++++++++++++++++-- 1 file changed, 21 insertions(+), 2 deletions(-) diff --git a/loopy/kernel/instruction.py b/loopy/kernel/instruction.py index 6ce257d31..e3d76bd1f 100644 --- a/loopy/kernel/instruction.py +++ b/loopy/kernel/instruction.py @@ -222,6 +222,7 @@ class InstructionBase(ImmutableRecord, Taggable): def __init__(self, id, depends_on, depends_on_is_final, dependencies, + non_linearizing_deps, groups, conflicts_with_groups, no_sync_with, within_inames_is_final, within_inames, @@ -253,6 +254,9 @@ def __init__(self, id, depends_on, depends_on_is_final, if dependencies is None: dependencies = {} + # TODO dependee ids for deps that don't affect cartoon dag + if non_linearizing_deps is None: + non_linearizing_deps = set() if groups is None: groups = frozenset() @@ -311,6 +315,7 @@ def __init__(self, id, depends_on, depends_on_is_final, depends_on=depends_on, depends_on_is_final=depends_on_is_final, dependencies=dependencies, + non_linearizing_deps=non_linearizing_deps, # TODO no_sync_with=no_sync_with, groups=groups, conflicts_with_groups=conflicts_with_groups, within_inames_is_final=within_inames_is_final, @@ -405,6 +410,9 @@ def get_str_options(self): result.append("dep="+":".join(self.depends_on)) if self.dependencies: result.append("dependencies="+":".join(self.dependencies.keys())) + if self.non_linearizing_deps: + result.append( + "non_linearizing_deps="+":".join(self.non_linearizing_deps)) if self.no_sync_with: result.append("nosync="+":".join( "%s@%s" % entry for entry in self.no_sync_with)) @@ -475,6 +483,7 @@ def __setstate__(self, val): self.id = intern(self.id) self.depends_on = intern_frozenset_of_ids(self.depends_on) # TODO something with dependencies? + # TODO something with non_linearizing_deps? self.groups = intern_frozenset_of_ids(self.groups) self.conflicts_with_groups = ( intern_frozenset_of_ids(self.conflicts_with_groups)) @@ -883,6 +892,7 @@ def __init__(self, depends_on=None, depends_on_is_final=None, dependencies=None, + non_linearizing_deps=None, # TODO groups=None, conflicts_with_groups=None, no_sync_with=None, @@ -897,6 +907,7 @@ def __init__(self, depends_on=depends_on, depends_on_is_final=depends_on_is_final, dependencies=dependencies, + non_linearizing_deps=non_linearizing_deps, # TODO groups=groups, conflicts_with_groups=conflicts_with_groups, no_sync_with=no_sync_with, @@ -1033,6 +1044,7 @@ def __init__(self, depends_on=None, depends_on_is_final=None, dependencies=None, + non_linearizing_deps=None, # TODO groups=None, conflicts_with_groups=None, no_sync_with=None, @@ -1047,6 +1059,7 @@ def __init__(self, depends_on=depends_on, depends_on_is_final=depends_on_is_final, dependencies=dependencies, + non_linearizing_deps=non_linearizing_deps, # TODO groups=groups, conflicts_with_groups=conflicts_with_groups, no_sync_with=no_sync_with, @@ -1234,6 +1247,7 @@ def __init__(self, depends_on=None, depends_on_is_final=None, dependencies=None, + non_linearizing_deps=None, # TODO groups=None, conflicts_with_groups=None, no_sync_with=None, @@ -1257,6 +1271,7 @@ def __init__(self, depends_on=depends_on, depends_on_is_final=depends_on_is_final, dependencies=dependencies, + non_linearizing_deps=non_linearizing_deps, # TODO groups=groups, conflicts_with_groups=conflicts_with_groups, no_sync_with=no_sync_with, within_inames_is_final=within_inames_is_final, @@ -1402,7 +1417,8 @@ def __init__( id=None, depends_on=None, depends_on_is_final=None, - dependencies=None, + dependencies=None, # TODO + non_linearizing_deps=None, groups=None, conflicts_with_groups=None, no_sync_with=None, @@ -1416,6 +1432,7 @@ def __init__( depends_on=depends_on, depends_on_is_final=depends_on_is_final, dependencies=dependencies, + non_linearizing_deps=non_linearizing_deps, # TODO groups=groups, conflicts_with_groups=conflicts_with_groups, no_sync_with=no_sync_with, @@ -1470,7 +1487,8 @@ def __init__( id, depends_on=None, depends_on_is_final=None, - dependencies=None, + dependencies=None, # TODO + non_linearizing_deps=None, groups=None, conflicts_with_groups=None, no_sync_with=None, @@ -1490,6 +1508,7 @@ def __init__( depends_on=depends_on, depends_on_is_final=depends_on_is_final, dependencies=dependencies, + non_linearizing_deps=non_linearizing_deps, # TODO groups=groups, conflicts_with_groups=conflicts_with_groups, no_sync_with=no_sync_with, From 2fe181f151aa6ad8394e1a585876d31564115658 Mon Sep 17 00:00:00 2001 From: jdsteve2 Date: Tue, 15 Jun 2021 03:19:23 -0500 Subject: [PATCH 21/22] when intersecting with SAME, ignore dependees in stmt.non_linearizing_deps --- loopy/schedule/checker/dependency.py | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/loopy/schedule/checker/dependency.py b/loopy/schedule/checker/dependency.py index 97b5321bd..47199a243 100644 --- a/loopy/schedule/checker/dependency.py +++ b/loopy/schedule/checker/dependency.py @@ -87,9 +87,18 @@ def filter_deps_by_intersection_with_SAME(knl): for dependee_id, dep_maps in stmt.dependencies.items(): + # Continue if we've been told to ignore this dependee + if stmt.non_linearizing_deps is None: + dependees_to_ignore = set() + else: + dependees_to_ignore = stmt.non_linearizing_deps + if dependee_id in dependees_to_ignore: + # TODO better fix for this...? + continue + # Continue if we already have this pair - if dependee_id in deps_filtered.keys() and ( - depender_id in deps_filtered[dependee_id]): + if depender_id in deps_filtered.keys() and ( + dependee_id in deps_filtered[depender_id]): continue for dep_map in dep_maps: From 47ab67a8fb68fe5ec3cbed5f2a1f33307569f533 Mon Sep 17 00:00:00 2001 From: jdsteve2 Date: Mon, 26 Jul 2021 18:25:39 -0500 Subject: [PATCH 22/22] pass kernel['loopy_kernel'] to filter_deps_by_intersection_with_SAME() --- test/test_linearization_checker.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/test/test_linearization_checker.py b/test/test_linearization_checker.py index 08f12b67a..8d96e09a0 100644 --- a/test/test_linearization_checker.py +++ b/test/test_linearization_checker.py @@ -2893,7 +2893,8 @@ def _dep_with_condition(stmt_before, stmt_after, cond): from loopy.schedule.checker.dependency import ( filter_deps_by_intersection_with_SAME, ) - filtered_depends_on_dict = filter_deps_by_intersection_with_SAME(knl) + filtered_depends_on_dict = filter_deps_by_intersection_with_SAME( + knl["loopy_kernel"]) # Make sure filtered edges are correct @@ -2973,7 +2974,8 @@ def _dep_with_condition(stmt_before, stmt_after, cond): from loopy.schedule.checker.dependency import ( filter_deps_by_intersection_with_SAME, ) - filtered_depends_on_dict = filter_deps_by_intersection_with_SAME(knl) + filtered_depends_on_dict = filter_deps_by_intersection_with_SAME( + knl["loopy_kernel"]) # Make sure filtered edges are correct