diff --git a/loopy/kernel/instruction.py b/loopy/kernel/instruction.py index c19966c7f..58e06c2b9 100644 --- a/loopy/kernel/instruction.py +++ b/loopy/kernel/instruction.py @@ -222,6 +222,7 @@ class InstructionBase(ImmutableRecord, Taggable): def __init__(self, id, depends_on, depends_on_is_final, dependencies, + non_linearizing_deps, groups, conflicts_with_groups, no_sync_with, within_inames_is_final, within_inames, @@ -253,6 +254,9 @@ def __init__(self, id, depends_on, depends_on_is_final, if dependencies is None: dependencies = {} + # TODO dependee ids for deps that don't affect cartoon dag + if non_linearizing_deps is None: + non_linearizing_deps = set() if groups is None: groups = frozenset() @@ -311,6 +315,7 @@ def __init__(self, id, depends_on, depends_on_is_final, depends_on=depends_on, depends_on_is_final=depends_on_is_final, dependencies=dependencies, + non_linearizing_deps=non_linearizing_deps, # TODO no_sync_with=no_sync_with, groups=groups, conflicts_with_groups=conflicts_with_groups, within_inames_is_final=within_inames_is_final, @@ -408,6 +413,9 @@ def get_str_options(self): result.append("dep="+":".join(self.depends_on)) if self.dependencies: result.append("dependencies="+":".join(self.dependencies.keys())) + if self.non_linearizing_deps: + result.append( + "non_linearizing_deps="+":".join(self.non_linearizing_deps)) if self.no_sync_with: result.append("nosync="+":".join( "%s@%s" % entry for entry in self.no_sync_with)) @@ -478,6 +486,7 @@ def __setstate__(self, val): self.id = intern(self.id) self.depends_on = intern_frozenset_of_ids(self.depends_on) # TODO something with dependencies? + # TODO something with non_linearizing_deps? self.groups = intern_frozenset_of_ids(self.groups) self.conflicts_with_groups = ( intern_frozenset_of_ids(self.conflicts_with_groups)) @@ -905,6 +914,7 @@ def __init__(self, depends_on=None, depends_on_is_final=None, dependencies=None, + non_linearizing_deps=None, # TODO groups=None, conflicts_with_groups=None, no_sync_with=None, @@ -922,6 +932,7 @@ def __init__(self, depends_on=depends_on, depends_on_is_final=depends_on_is_final, dependencies=dependencies, + non_linearizing_deps=non_linearizing_deps, # TODO groups=groups, conflicts_with_groups=conflicts_with_groups, no_sync_with=no_sync_with, @@ -1058,6 +1069,7 @@ def __init__(self, depends_on=None, depends_on_is_final=None, dependencies=None, + non_linearizing_deps=None, # TODO groups=None, conflicts_with_groups=None, no_sync_with=None, @@ -1072,6 +1084,7 @@ def __init__(self, depends_on=depends_on, depends_on_is_final=depends_on_is_final, dependencies=dependencies, + non_linearizing_deps=non_linearizing_deps, # TODO groups=groups, conflicts_with_groups=conflicts_with_groups, no_sync_with=no_sync_with, @@ -1356,6 +1369,7 @@ def __init__(self, depends_on=None, depends_on_is_final=None, dependencies=None, + non_linearizing_deps=None, # TODO groups=None, conflicts_with_groups=None, no_sync_with=None, @@ -1379,6 +1393,7 @@ def __init__(self, depends_on=depends_on, depends_on_is_final=depends_on_is_final, dependencies=dependencies, + non_linearizing_deps=non_linearizing_deps, # TODO groups=groups, conflicts_with_groups=conflicts_with_groups, no_sync_with=no_sync_with, within_inames_is_final=within_inames_is_final, @@ -1530,7 +1545,8 @@ def __init__( id=None, depends_on=None, depends_on_is_final=None, - dependencies=None, + dependencies=None, # TODO + non_linearizing_deps=None, groups=None, conflicts_with_groups=None, no_sync_with=None, @@ -1544,6 +1560,7 @@ def __init__( depends_on=depends_on, depends_on_is_final=depends_on_is_final, dependencies=dependencies, + non_linearizing_deps=non_linearizing_deps, # TODO groups=groups, conflicts_with_groups=conflicts_with_groups, no_sync_with=no_sync_with, @@ -1598,7 +1615,8 @@ def __init__( id, depends_on=None, depends_on_is_final=None, - dependencies=None, + dependencies=None, # TODO + non_linearizing_deps=None, groups=None, conflicts_with_groups=None, no_sync_with=None, @@ -1618,6 +1636,7 @@ def __init__( depends_on=depends_on, depends_on_is_final=depends_on_is_final, dependencies=dependencies, + non_linearizing_deps=non_linearizing_deps, # TODO groups=groups, conflicts_with_groups=conflicts_with_groups, no_sync_with=no_sync_with, diff --git a/loopy/options.py b/loopy/options.py index 9f12814b0..da242e648 100644 --- a/loopy/options.py +++ b/loopy/options.py @@ -242,6 +242,7 @@ def __init__( disable_global_barriers=kwargs.get("disable_global_barriers", False), check_dep_resolution=kwargs.get("check_dep_resolution", True), + use_dependencies_v2=kwargs.get("use_dependencies_v2", False), enforce_variable_access_ordered=kwargs.get( "enforce_variable_access_ordered", True), diff --git a/loopy/schedule/__init__.py b/loopy/schedule/__init__.py index 5822f44ed..4ded4e330 100644 --- a/loopy/schedule/__init__.py +++ b/loopy/schedule/__init__.py @@ -253,57 +253,100 @@ def find_loop_nest_around_map(kernel): return result -def find_loop_insn_dep_map(kernel, loop_nest_with_map, loop_nest_around_map): +def find_loop_insn_dep_map( + kernel, loop_nest_with_map, loop_nest_around_map, + simplified_depends_on_graph): """Returns a dictionary mapping inames to other instruction ids that need to be scheduled before the iname should be eligible for scheduling. + + :arg loop_nest_with_map: Dictionary mapping iname1 to a set containing + iname2 iff either iname1 nests around iname2 or iname2 nests around + iname1 + + :arg loop_nest_around_map: Dictionary mapping iname1 to a set containing + iname2 iff iname2 nests around iname1 + + :arg simplified_depends_on_graph: Dictionary mapping depender statement IDs + to sets of dependee statement IDs, as produced by + `loopy.schedule.checker.dependency.filter_deps_by_intersection_with_SAME`, + which will be used to acquire depndee statement ids if + `kernel.options.use_dependencies_v2` is 'True' (otherwise old + dependencies in insn.depends_on will be used). + """ result = {} from loopy.kernel.data import ConcurrentTag, IlpBaseTag + # For each insn, examine its inames (`iname`) and its dependees' inames + # (`dep_iname`) to determine which instructions must be scheduled before + # entering the iname loop. + # Create result dict, which maps iname to instructions that must be + # scheduled prior to entering iname. + + # For each insn, loop over its non-concurrent inames (`iname`) for insn in kernel.instructions: for iname in kernel.insn_inames(insn): + # (Ignore concurrent inames) if kernel.iname_tags_of_type(iname, ConcurrentTag): continue + # Let iname_dep be the set of ids associated with result[iname] + # (if iname is not already in result, add iname as a key) iname_dep = result.setdefault(iname, set()) - for dep_insn_id in insn.depends_on: + # Loop over instructions on which insn depends (dep_insn) + # and determine whether dep_insn must be schedued before + # iname, in which case add its id to iname_dep (result[iname]) + if kernel.options.use_dependencies_v2: + dependee_ids = simplified_depends_on_graph.get(insn.id, set()) + else: + dependee_ids = insn.depends_on + + for dep_insn_id in dependee_ids: if dep_insn_id in iname_dep: # already depending, nothing to check continue - dep_insn = kernel.id_to_insn[dep_insn_id] - dep_insn_inames = dep_insn.within_inames + dep_insn = kernel.id_to_insn[dep_insn_id] # Dependee + dep_insn_inames = dep_insn.within_inames # Dependee inames + # Check whether insn's iname is also in dependee inames if iname in dep_insn_inames: - # Nothing to be learned, dependency is in loop over iname + # Nothing to be learned, dependee is inside loop over iname # already. continue # To make sure dep_insn belongs outside of iname, we must prove - # that all inames that dep_insn will be executed in nest + # that all inames in which dep_insn will be executed nest # outside of the loop over *iname*. (i.e. nested around, or # before). + # Loop over each of the dependee's inames (dep_insn_iname) may_add_to_loop_dep_map = True for dep_insn_iname in dep_insn_inames: + + # If loop_nest_around_map says dep_insn_iname nests around + # iname, dep_insn_iname is guaranteed to nest outside of + # iname, we're safe, so continue if dep_insn_iname in loop_nest_around_map[iname]: - # dep_insn_iname is guaranteed to nest outside of iname - # -> safe. continue + # If dep_insn_iname is concurrent, continue + # (parallel tags don't really nest, so disregard them here) if kernel.iname_tags_of_type(dep_insn_iname, (ConcurrentTag, IlpBaseTag)): - # Parallel tags don't really nest, so we'll disregard - # them here. continue + # If loop_nest_with_map says dep_insn_iname does not nest + # inside or around iname, it must be nested separately; + # we're safe, so continue if dep_insn_iname not in loop_nest_with_map.get(iname, []): - # dep_insn_iname does not nest with iname, so its nest - # must occur outside. continue + # If none of the three cases above succeeds for any + # dep_insn_iname in dep_insn_inames, we cannot add dep_insn + # to iname's set of insns in result dict. may_add_to_loop_dep_map = False break @@ -318,6 +361,9 @@ def find_loop_insn_dep_map(kernel, loop_nest_with_map, loop_nest_around_map): dep_insn=dep_insn_id, insn=insn.id)) + # If at least one of the three cases above succeeds for every + # dep_insn_iname, we can add dep_insn to iname's set of insns + # in result dict. iname_dep.add(dep_insn_id) return result @@ -333,16 +379,24 @@ def group_insn_counts(kernel): return result -def gen_dependencies_except(kernel, insn_id, except_insn_ids): - insn = kernel.id_to_insn[insn_id] - for dep_id in insn.depends_on: +def gen_dependencies_except( + kernel, insn_id, except_insn_ids, simplified_depends_on_graph): + + # Get dependee IDs + if kernel.options.use_dependencies_v2: + dependee_ids = simplified_depends_on_graph.get(insn_id, set()) + else: + dependee_ids = kernel.id_to_insn[insn_id].depends_on + + for dep_id in dependee_ids: if dep_id in except_insn_ids: continue yield dep_id - yield from gen_dependencies_except(kernel, dep_id, except_insn_ids) + yield from gen_dependencies_except( + kernel, dep_id, except_insn_ids, simplified_depends_on_graph) def get_priority_tiers(wanted, priorities): @@ -631,6 +685,8 @@ class SchedulerState(ImmutableRecord): order with instruction priorities as tie breaker. """ + # TODO document simplified_depends_on_graph + @property def last_entered_loop(self): if self.active_inames: @@ -641,12 +697,20 @@ def last_entered_loop(self): # }}} -def get_insns_in_topologically_sorted_order(kernel): +def get_insns_in_topologically_sorted_order( + kernel, simplified_depends_on_graph): from pytools.graph import compute_topological_order rev_dep_map = {insn.id: set() for insn in kernel.instructions} for insn in kernel.instructions: - for dep in insn.depends_on: + + if kernel.options.use_dependencies_v2: + dependee_ids = simplified_depends_on_graph.get( + insn.id, set()) + else: + dependee_ids = insn.depends_on + + for dep in dependee_ids: rev_dep_map[dep].add(insn.id) # For breaking ties, we compare the features of an intruction @@ -680,7 +744,8 @@ def key(insn_id): # {{{ schedule_as_many_run_insns_as_possible -def schedule_as_many_run_insns_as_possible(sched_state, template_insn): +def schedule_as_many_run_insns_as_possible( + sched_state, template_insn, use_dependencies_v2): """ Returns an instance of :class:`loopy.schedule.SchedulerState`, by appending all reachable instructions that are similar to *template_insn*. We define @@ -748,7 +813,14 @@ def is_similar_to_template(insn): if is_similar_to_template(insn): # check reachability - if not (insn.depends_on & ignored_unscheduled_insn_ids): + + if use_dependencies_v2: + dependee_ids = sched_state.simplified_depends_on_graph.get( + insn.id, set()) + else: + dependee_ids = insn.depends_on + + if not (dependee_ids & ignored_unscheduled_insn_ids): if insn.id in sched_state.prescheduled_insn_ids: if next_preschedule_insn_id() == insn.id: preschedule.pop(0) @@ -937,7 +1009,14 @@ def insn_sort_key(insn_id): for insn_id in insn_ids_to_try: insn = kernel.id_to_insn[insn_id] - is_ready = insn.depends_on <= sched_state.scheduled_insn_ids + # make sure dependees have been scheduled + if kernel.options.use_dependencies_v2: + dependee_ids = sched_state.simplified_depends_on_graph.get( + insn.id, set()) + else: + dependee_ids = insn.depends_on + + is_ready = dependee_ids <= sched_state.scheduled_insn_ids if not is_ready: continue @@ -1068,8 +1147,8 @@ def insn_sort_key(insn_id): insns_in_topologically_sorted_order=new_toposorted_insns, ) - new_sched_state = schedule_as_many_run_insns_as_possible(new_sched_state, - insn) + new_sched_state = schedule_as_many_run_insns_as_possible( + new_sched_state, insn, kernel.options.use_dependencies_v2) # Don't be eager about entering/leaving loops--if progress has been # made, revert to top of scheduler and see if more progress can be @@ -1116,8 +1195,10 @@ def insn_sort_key(insn_id): # check if there's a dependency of insn that needs to be # outside of last_entered_loop. - for subdep_id in gen_dependencies_except(kernel, insn_id, - sched_state.scheduled_insn_ids): + for subdep_id in gen_dependencies_except( + kernel, insn_id, + sched_state.scheduled_insn_ids, + sched_state.simplified_depends_on_graph): want = (kernel.insn_inames(subdep_id) - sched_state.parallel_inames) if ( @@ -1754,10 +1835,10 @@ def _insn_ids_reaching_end(schedule, kind, reverse): return insn_ids_alive_at_scope[-1] -def append_barrier_or_raise_error(kernel_name, schedule, dep, verify_only): +def append_barrier_or_raise_error( + kernel_name, schedule, dep, verify_only, use_dependencies_v2=False): if verify_only: - from loopy.diagnostic import MissingBarrierError - raise MissingBarrierError( + err_str = ( "%s: Dependency '%s' (for variable '%s') " "requires synchronization " "by a %s barrier (add a 'no_sync_with' " @@ -1769,6 +1850,14 @@ def append_barrier_or_raise_error(kernel_name, schedule, dep, verify_only): tgt=dep.target.id, src=dep.source.id), dep.variable, dep.var_kind)) + # TODO need to update all this with v2 deps. For now, make this a warning. + # Do full fix for this later + if use_dependencies_v2: + from warnings import warn + warn(err_str) + else: + from loopy.diagnostic import MissingBarrierError + raise MissingBarrierError(err_str) else: comment = "for {} ({})".format( dep.variable, dep.dep_descr.format( @@ -1836,7 +1925,8 @@ def insert_barriers_at_outer_level(schedule, reverse=False): dep_tracker.gen_dependencies_with_target_at(insn) for insn in loop_head): append_barrier_or_raise_error( - kernel.name, result, dep, verify_only) + kernel.name, result, dep, verify_only, + kernel.options.use_dependencies_v2) # This barrier gets inserted outside the loop, hence it is # executed unconditionally and so kills all sources before # the loop. @@ -1869,7 +1959,8 @@ def insert_barriers_at_outer_level(schedule, reverse=False): for dep in dep_tracker.gen_dependencies_with_target_at( sched_item.insn_id): append_barrier_or_raise_error( - kernel.name, result, dep, verify_only) + kernel.name, result, dep, verify_only, + kernel.options.use_dependencies_v2) dep_tracker.discard_all_sources() break result.append(sched_item) @@ -1998,13 +2089,32 @@ def generate_loop_schedules_inner(kernel, callables_table, debug_args=None): loop_nest_with_map = find_loop_nest_with_map(kernel) loop_nest_around_map = find_loop_nest_around_map(kernel) + + # {{{ create simplified dependency graph with edge from depender* to + # dependee* iff intersection (SAME_map & DEP_map) is not empty + + if kernel.options.use_dependencies_v2: + from loopy.schedule.checker.dependency import ( + filter_deps_by_intersection_with_SAME, + ) + + # Get dep graph edges with edges FROM depender TO dependee + simplified_depends_on_graph = filter_deps_by_intersection_with_SAME(kernel) + else: + simplified_depends_on_graph = None + + # }}} + sched_state = SchedulerState( kernel=kernel, loop_nest_around_map=loop_nest_around_map, loop_insn_dep_map=find_loop_insn_dep_map( kernel, loop_nest_with_map=loop_nest_with_map, - loop_nest_around_map=loop_nest_around_map), + loop_nest_around_map=loop_nest_around_map, + simplified_depends_on_graph=simplified_depends_on_graph, + ), + simplified_depends_on_graph=simplified_depends_on_graph, breakable_inames=ilp_inames, ilp_inames=ilp_inames, vec_inames=vec_inames, @@ -2034,7 +2144,8 @@ def generate_loop_schedules_inner(kernel, callables_table, debug_args=None): active_group_counts={}, insns_in_topologically_sorted_order=( - get_insns_in_topologically_sorted_order(kernel)), + get_insns_in_topologically_sorted_order( + kernel, simplified_depends_on_graph)), ) schedule_gen_kwargs = {} diff --git a/loopy/schedule/checker/dependency.py b/loopy/schedule/checker/dependency.py new file mode 100644 index 000000000..47199a243 --- /dev/null +++ b/loopy/schedule/checker/dependency.py @@ -0,0 +1,138 @@ +__copyright__ = "Copyright (C) 2019 James Stevens" + +__license__ = """ +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +""" + +import islpy as isl + + +class DependencyType: + """Strings specifying a particular type of dependency relationship. + + .. attribute:: SAME + + A :class:`str` specifying the following dependency relationship: + + If ``S = {i, j, ...}`` is a set of inames used in both statements + ``insn0`` and ``insn1``, and ``{i', j', ...}`` represent the values + of the inames in ``insn0``, and ``{i, j, ...}`` represent the + values of the inames in ``insn1``, then the dependency + ``insn0 happens before insn1 iff SAME({i, j})`` specifies that + ``insn0 happens before insn1 iff {i' = i and j' = j and ...}``. + Note that ``SAME({}) = True``. + + .. attribute:: PRIOR + + A :class:`str` specifying the following dependency relationship: + + If ``S = {i, j, k, ...}`` is a set of inames used in both statements + ``insn0`` and ``insn1``, and ``{i', j', k', ...}`` represent the values + of the inames in ``insn0``, and ``{i, j, k, ...}`` represent the + values of the inames in ``insn1``, then the dependency + ``insn0 happens before insn1 iff PRIOR({i, j, k})`` specifies one of + two possibilities, depending on whether the loop nest ordering is + known. If the loop nest ordering is unknown, then + ``insn0 happens before insn1 iff {i' < i and j' < j and k' < k ...}``. + If the loop nest ordering is known, the condition becomes + ``{i', j', k', ...}`` is lexicographically less than ``{i, j, k, ...}``, + i.e., ``i' < i or (i' = i and j' < j) or (i' = i and j' = j and k' < k) ...``. + + """ + + SAME = "same" + PRIOR = "prior" + + +def filter_deps_by_intersection_with_SAME(knl): + # Determine which dep relations have a non-empty intersection with + # the SAME relation + # TODO document + + from loopy.schedule.checker.utils import ( + append_mark_to_strings, + partition_inames_by_concurrency, + create_elementwise_comparison_conjunction_set, + convert_map_to_set, + convert_set_back_to_map, + ) + from loopy.schedule.checker.schedule import ( + BEFORE_MARK, + ) + _, non_conc_inames = partition_inames_by_concurrency(knl) + + # NOTE: deps filtered will map depender->dependee + deps_filtered = {} + for stmt in knl.instructions: + + if hasattr(stmt, "dependencies") and stmt.dependencies: + + depender_id = stmt.id + + for dependee_id, dep_maps in stmt.dependencies.items(): + + # Continue if we've been told to ignore this dependee + if stmt.non_linearizing_deps is None: + dependees_to_ignore = set() + else: + dependees_to_ignore = stmt.non_linearizing_deps + if dependee_id in dependees_to_ignore: + # TODO better fix for this...? + continue + + # Continue if we already have this pair + if depender_id in deps_filtered.keys() and ( + dependee_id in deps_filtered[depender_id]): + continue + + for dep_map in dep_maps: + # Create isl map representing "SAME" dep for these two insns + + # Get shared nonconcurrent inames + depender_inames = knl.id_to_insn[depender_id].within_inames + dependee_inames = knl.id_to_insn[dependee_id].within_inames + shared_nc_inames = ( + depender_inames & dependee_inames & non_conc_inames) + + # Temporarily convert to set + dep_set_space, n_in_dims, n_out_dims = convert_map_to_set( + dep_map.space) + + # Create SAME relation + same_set_affs = isl.affs_from_space(dep_set_space) + same_set = create_elementwise_comparison_conjunction_set( + shared_nc_inames, + append_mark_to_strings(shared_nc_inames, BEFORE_MARK), + same_set_affs) + + # Convert back to map + same_map = convert_set_back_to_map( + same_set, n_in_dims, n_out_dims) + + # Don't need to intersect same_map with iname bounds (I think..?) + + # See whether the intersection of dep map and SAME is empty + intersect_dep_and_same = same_map & dep_map + intersect_not_empty = not bool(intersect_dep_and_same.is_empty()) + + if intersect_not_empty: + deps_filtered.setdefault(depender_id, set()).add(dependee_id) + break # No need to check any more deps for this pair + + return deps_filtered diff --git a/loopy/schedule/checker/utils.py b/loopy/schedule/checker/utils.py index 7f95107b7..5d0858dfb 100644 --- a/loopy/schedule/checker/utils.py +++ b/loopy/schedule/checker/utils.py @@ -284,6 +284,7 @@ def sorted_union_of_names_in_isl_sets( def convert_map_to_set(isl_map): + # also works for spaces n_in_dims = len(isl_map.get_var_names(dt.in_)) n_out_dims = len(isl_map.get_var_names(dt.out)) return isl_map.move_dims( @@ -291,6 +292,11 @@ def convert_map_to_set(isl_map): ).domain(), n_in_dims, n_out_dims +def convert_set_back_to_map(isl_set, n_old_in_dims, n_old_out_dims): + return isl.Map.from_domain( + isl_set).move_dims(dt.out, 0, dt.in_, n_old_in_dims, n_old_out_dims) + + def create_symbolic_map_from_tuples( tuple_pairs_with_domains, space, diff --git a/test/test_linearization_checker.py b/test/test_linearization_checker.py index d9324f12b..8d96e09a0 100644 --- a/test/test_linearization_checker.py +++ b/test/test_linearization_checker.py @@ -148,6 +148,13 @@ def _process_and_linearize(prog, knl_name="loopy_kernel"): proc_prog[knl_name], proc_prog.callables_table) return lin_prog.linearization, proc_prog[knl_name], lin_prog + +def _get_runinstruction_ids_from_linearization(lin_items): + from loopy.schedule import RunInstruction + return [ + lin_item.insn_id for lin_item in lin_items + if isinstance(lin_item, RunInstruction)] + # }}} @@ -1620,10 +1627,8 @@ def test_sios_with_matmul(): lin_items, proc_knl, lin_knl = _process_and_linearize(knl) # Get ALL statement id pairs - from loopy.schedule import RunInstruction - all_stmt_ids = [ - lin_item.insn_id for lin_item in lin_items - if isinstance(lin_item, RunInstruction)] + all_stmt_ids = _get_runinstruction_ids_from_linearization(lin_items) + from itertools import product stmt_id_pairs = [] for idx, sid in enumerate(all_stmt_ids): @@ -2820,6 +2825,206 @@ def test_add_prefetch_with_dependencies(): # }}} + +# {{{ Dependency handling during linearization + +# {{{ test_filtering_deps_by_same + +def test_filtering_deps_by_same(): + + # Make a kernel (just need something that can carry deps) + knl = lp.make_kernel( + "{[i,j,k,m] : 0 <= i,j,k,m < n}", + """ + a[i,j,k,m] = 5 {id=s5} + a[i,j,k,m] = 4 {id=s4} + a[i,j,k,m] = 3 {id=s3} + a[i,j,k,m] = 2 {id=s2} + a[i,j,k,m] = 1 {id=s1} + """) + knl = lp.add_and_infer_dtypes(knl, {"a": np.float32}) + knl = lp.tag_inames(knl, "m:l.0") + + # Make some deps + + def _dep_with_condition(stmt_before, stmt_after, cond): + sid_after = 0 if stmt_before == stmt_after else 1 + return _isl_map_with_marked_dims( + "[n] -> {{" + "[{0}'=0, i', j', k', m'] -> [{0}={1}, i, j, k, m] : " + "0 <= i,j,k,m,i',j',k',m' < n and {2}" + "}}".format( + STATEMENT_VAR_NAME, sid_after, cond)) + + dep_s2_on_s1_1 = _dep_with_condition(2, 1, "i'< i and j'<=j and k'=k and m't5 = 5 {id=s5} + <>t3 = 3 {id=s3} + <>t4 = 4 {id=s4} + <>t1 = 1 {id=s1} + <>t2 = 2 {id=s2} + end + """) + knl = lp.tag_inames(knl, "m:l.0") + + stmt_ids_ordered_desired = ["s1", "s2", "s3", "s4", "s5"] + + # {{{ Add some deps + + def _dep_with_condition(stmt_before, stmt_after, cond): + sid_after = 0 if stmt_before == stmt_after else 1 + return _isl_map_with_marked_dims( + "[n] -> {{" + "[{0}'=0, i', j', k', m'] -> [{0}={1}, i, j, k, m] : " + "0 <= i,j,k,m,i',j',k',m' < n and {2}" + "}}".format( + STATEMENT_VAR_NAME, sid_after, cond)) + + # Should NOT create an edge: + dep_s2_on_s1_1 = _dep_with_condition(2, 1, "i'< i and j'<=j and k' =k and m'=m") + # Should create an edge: + dep_s2_on_s1_2 = _dep_with_condition(2, 1, "i'<=i and j'<=j and k' =k and m'=m") + # Should NOT create an edge: + dep_s2_on_s2_1 = _dep_with_condition(2, 2, "i'< i and j'<=j and k' =k and m'=m") + # Should NOT create an edge: + dep_s2_on_s2_2 = _dep_with_condition(2, 2, "i'<=i and j'<=j and k'< k and m'=m") + # Should create an edge: + dep_s3_on_s2_1 = _dep_with_condition(3, 2, "i'<=i and j'<=j and k' =k and m'=m") + # Should create an edge: + dep_s4_on_s3_1 = _dep_with_condition(4, 3, "i'<=i and j'<=j and k' =k and m'=m") + # Should create an edge: + dep_s5_on_s4_1 = _dep_with_condition(5, 4, "i' =i and j' =j and k' =k and m'=m") + + knl = lp.add_dependency_v2(knl, "s2", "s1", dep_s2_on_s1_1) + knl = lp.add_dependency_v2(knl, "s2", "s1", dep_s2_on_s1_2) + knl = lp.add_dependency_v2(knl, "s2", "s2", dep_s2_on_s2_1) + knl = lp.add_dependency_v2(knl, "s2", "s2", dep_s2_on_s2_2) + knl = lp.add_dependency_v2(knl, "s3", "s2", dep_s3_on_s2_1) + knl = lp.add_dependency_v2(knl, "s4", "s3", dep_s4_on_s3_1) + knl = lp.add_dependency_v2(knl, "s5", "s4", dep_s5_on_s4_1) + + # }}} + + # {{{ Test filteringn of deps by intersection with SAME + + from loopy.schedule.checker.dependency import ( + filter_deps_by_intersection_with_SAME, + ) + filtered_depends_on_dict = filter_deps_by_intersection_with_SAME( + knl["loopy_kernel"]) + + # Make sure filtered edges are correct + + # (m is concurrent so shouldn't matter) + depends_on_dict_expected = { + "s2": set(["s1"]), + "s3": set(["s2"]), + "s4": set(["s3"]), + "s5": set(["s4"]), + } + + assert filtered_depends_on_dict == depends_on_dict_expected + + # }}} + + # {{{ Get a linearization WITHOUT using the simplified dep graph + + knl = lp.set_options(knl, use_dependencies_v2=False) + lin_items, proc_knl, lin_knl = _process_and_linearize(knl) + + # Check stmt order (should be wrong) + stmt_ids_ordered = _get_runinstruction_ids_from_linearization(lin_items) + assert stmt_ids_ordered != stmt_ids_ordered_desired + + # Check dep satisfaction (should not all be satisfied) + unsatisfied_deps = lp.find_unsatisfied_dependencies(proc_knl, lin_items) + assert unsatisfied_deps + + # }}} + + # {{{ Get a linearization using the simplified dep graph + + knl = lp.set_options(knl, use_dependencies_v2=True) + lin_items, proc_knl, lin_knl = _process_and_linearize(knl) + + # Check stmt order + stmt_ids_ordered = _get_runinstruction_ids_from_linearization(lin_items) + assert stmt_ids_ordered == stmt_ids_ordered_desired + + # Check dep satisfaction + unsatisfied_deps = lp.find_unsatisfied_dependencies(proc_knl, lin_items) + assert not unsatisfied_deps + + # }}} + +# }}} + +# }}} + # }}}